From eeb88c679ff1e10ca521d8afc87d0169b5c15925 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 10 Mar 2023 11:23:24 -0800 Subject: [PATCH 01/18] Refactoring and bug fixes on top of unnest. The filter now is passed inside the unnest cursors. Added tests for scenarios such as 1. filter on unnested column which involves a left filter rewrite 2. filter on unnested virtual column which pushes the filter to the right only and involves no rewrite 3. not filters 4. SQL functions applied on top of unnested column 5. null present in first row of the column to be unnested --- .../apache/druid/query/UnnestDataSource.java | 28 +- .../UnnestColumnValueSelectorCursor.java | 59 ++-- .../druid/segment/UnnestDimensionCursor.java | 78 ++---- .../druid/segment/UnnestSegmentReference.java | 10 +- .../druid/segment/UnnestStorageAdapter.java | 38 ++- .../druid/query/QueryRunnerTestHelper.java | 3 +- .../groupby/UnnestGroupByQueryRunnerTest.java | 12 +- .../query/scan/UnnestScanQueryRunnerTest.java | 89 +----- .../query/topn/UnnestTopNQueryRunnerTest.java | 6 +- .../UnnestColumnValueSelectorCursorTest.java | 75 ++--- .../segment/UnnestStorageAdapterTest.java | 172 +----------- .../calcite/rel/DruidCorrelateUnnestRel.java | 3 +- .../sql/calcite/CalciteArraysQueryTest.java | 264 +++++++++++++++--- 13 files changed, 342 insertions(+), 495 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 2bb24784adfb..d46cd226a8a5 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -29,8 +29,6 @@ import org.apache.druid.segment.VirtualColumn; import org.apache.druid.utils.JvmUtils; -import javax.annotation.Nullable; -import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -39,7 +37,6 @@ /** * The data source for representing an unnest operation. - * * An unnest data source has the following: * a base data source which is to be unnested * the column name of the MVD which will be unnested @@ -50,27 +47,23 @@ public class UnnestDataSource implements DataSource { private final DataSource base; private final VirtualColumn virtualColumn; - private final LinkedHashSet allowList; private UnnestDataSource( DataSource dataSource, - VirtualColumn virtualColumn, - LinkedHashSet allowList + VirtualColumn virtualColumn ) { this.base = dataSource; this.virtualColumn = virtualColumn; - this.allowList = allowList; } @JsonCreator public static UnnestDataSource create( @JsonProperty("base") DataSource base, - @JsonProperty("virtualColumn") VirtualColumn virtualColumn, - @Nullable @JsonProperty("allowList") LinkedHashSet allowList + @JsonProperty("virtualColumn") VirtualColumn virtualColumn ) { - return new UnnestDataSource(base, virtualColumn, allowList); + return new UnnestDataSource(base, virtualColumn); } @JsonProperty("base") @@ -85,12 +78,6 @@ public VirtualColumn getVirtualColumn() return virtualColumn; } - @JsonProperty("allowList") - public LinkedHashSet getAllowList() - { - return allowList; - } - @Override public Set getTableNames() { @@ -109,7 +96,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), virtualColumn, allowList); + return new UnnestDataSource(children.get(0), virtualColumn); } @Override @@ -146,17 +133,15 @@ public Function createSegmentMapFunction( baseSegment -> new UnnestSegmentReference( segmentMapFn.apply(baseSegment), - virtualColumn, - allowList + virtualColumn ) ); - } @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, virtualColumn, allowList); + return new UnnestDataSource(newSource, virtualColumn); } @Override @@ -203,7 +188,6 @@ public String toString() return "UnnestDataSource{" + "base=" + base + ", column='" + virtualColumn + '\'' + - ", allowList=" + allowList + '}'; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 1a3bbf4e0015..3b0a04857d25 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -22,15 +22,17 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; /** @@ -65,18 +67,20 @@ public class UnnestColumnValueSelectorCursor implements Cursor private final ColumnValueSelector columnValueSelector; private final VirtualColumn unnestColumn; private final String outputName; - private final LinkedHashSet allowSet; private int index; private Object currentVal; private List unnestListForCurrentRow; private boolean needInitialization; + private ValueMatcher valueMatcher; + @Nullable + private final Filter allowFilter; public UnnestColumnValueSelectorCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter allowFilter ) { this.baseCursor = cursor; @@ -89,7 +93,7 @@ public UnnestColumnValueSelectorCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; + this.allowFilter = allowFilter; } @Override @@ -194,11 +198,7 @@ public boolean isNull() public Object getObject() { if (!unnestListForCurrentRow.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return unnestListForCurrentRow.get(index); - } else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) { - return unnestListForCurrentRow.get(index); - } + return unnestListForCurrentRow.get(index); } return null; } @@ -253,9 +253,13 @@ public void advance() @Override public void advanceUninterruptibly() { - do { + while (true) { advanceAndUpdate(); - } while (matchAndProceed()); + boolean match = valueMatcher.matches(); + if (match || baseCursor.isDone()) { + return; + } + } } @Override @@ -310,12 +314,15 @@ private void getNextRow() private void initialize() { getNextRow(); - if (allowSet != null) { - if (!allowSet.isEmpty()) { - if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) { - advance(); - } - } + if (allowFilter != null) { + this.valueMatcher = allowFilter.makeMatcher(getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); + } + // If the first value the index is pointing to does not match the filter + // advance the index to the first value which will match + if (!valueMatcher.matches()) { + advance(); } needInitialization = false; } @@ -338,22 +345,4 @@ private void advanceAndUpdate() index++; } } - - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if (allowSet == null || allowSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index ba91f27815d8..c99995fe261e 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -23,16 +23,16 @@ import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; -import java.util.BitSet; -import java.util.LinkedHashSet; /** * The cursor to help unnest MVDs with dictionary encoding. @@ -79,8 +79,9 @@ public class UnnestDimensionCursor implements Cursor private final DimensionSelector dimSelector; private final VirtualColumn unnestColumn; private final String outputName; - private final LinkedHashSet allowSet; - private final BitSet allowedBitSet; + @Nullable + private final Filter allowFilter; + private ValueMatcher valueMatcher; private final ColumnSelectorFactory baseColumnSelectorFactory; private int index; @Nullable @@ -93,7 +94,7 @@ public UnnestDimensionCursor( ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter allowFilter ) { this.baseCursor = cursor; @@ -106,8 +107,7 @@ public UnnestDimensionCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; - this.allowedBitSet = new BitSet(); + this.allowFilter = allowFilter; } @Override @@ -158,6 +158,9 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean matches() { + if (indexedIntsForCurrentRow.size() <= 0) { + return false; + } return idForLookup == indexedIntsForCurrentRow.get(index); } @@ -188,14 +191,7 @@ public Object getObject() if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) { return null; } - if (allowedBitSet.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - } else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - return null; + return lookupName(indexedIntsForCurrentRow.get(index)); } @Override @@ -207,9 +203,6 @@ public Class classOfObject() @Override public int getValueCardinality() { - if (!allowedBitSet.isEmpty()) { - return allowedBitSet.cardinality(); - } return dimSelector.getValueCardinality(); } @@ -290,9 +283,13 @@ public void advance() @Override public void advanceUninterruptibly() { - do { + while (true) { advanceAndUpdate(); - } while (matchAndProceed()); + boolean match = valueMatcher.matches(); + if (match || baseCursor.isDone()) { + return; + } + } } @Override @@ -330,22 +327,19 @@ public void reset() @Nullable private void initialize() { - IdLookup idLookup = dimSelector.idLookup(); - this.indexIntsForRow = new SingleIndexInts(); - if (allowSet != null && !allowSet.isEmpty() && idLookup != null) { - for (String s : allowSet) { - if (idLookup.lookupId(s) >= 0) { - allowedBitSet.set(idLookup.lookupId(s)); - } - } + index = 0; + if (allowFilter != null) { + this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); } + this.indexIntsForRow = new SingleIndexInts(); + if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!allowedBitSet.isEmpty()) { - if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - advance(); - } + if (!valueMatcher.matches() && !baseCursor.isDone()) { + advance(); } needInitialization = false; } @@ -362,6 +356,9 @@ private void advanceAndUpdate() index = 0; if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); + if (!baseCursor.isDone()) { + indexedIntsForCurrentRow = dimSelector.getRow(); + } } } else { if (index >= indexedIntsForCurrentRow.size() - 1) { @@ -378,23 +375,6 @@ private void advanceAndUpdate() } } - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } // Helper class to help in returning // getRow from the dimensionSelector diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index a5db64d2b2d3..8f3871390045 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -28,7 +28,6 @@ import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; -import java.util.LinkedHashSet; import java.util.Optional; /** @@ -41,17 +40,15 @@ public class UnnestSegmentReference implements SegmentReference private final SegmentReference baseSegment; private final VirtualColumn unnestColumn; - private final LinkedHashSet allowSet; + public UnnestSegmentReference( SegmentReference baseSegment, - VirtualColumn unnestColumn, - LinkedHashSet allowList + VirtualColumn unnestColumn ) { this.baseSegment = baseSegment; this.unnestColumn = unnestColumn; - this.allowSet = allowList; } @Override @@ -103,8 +100,7 @@ public StorageAdapter asStorageAdapter() { return new UnnestStorageAdapter( baseSegment.asStorageAdapter(), - unnestColumn, - allowSet + unnestColumn ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 939333c75402..7c40db0792a5 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -62,18 +62,15 @@ public class UnnestStorageAdapter implements StorageAdapter private final StorageAdapter baseAdapter; private final VirtualColumn unnestColumn; private final String outputColumnName; - private final LinkedHashSet allowSet; public UnnestStorageAdapter( final StorageAdapter baseAdapter, - final VirtualColumn unnestColumn, - final LinkedHashSet allowSet + final VirtualColumn unnestColumn ) { this.baseAdapter = baseAdapter; this.unnestColumn = unnestColumn; this.outputColumnName = unnestColumn.getOutputName(); - this.allowSet = allowSet; } @Override @@ -87,7 +84,7 @@ public Sequence makeCursors( ) { final String inputColumn = getUnnestInputIfDirectAccess(); - final Pair filterPair = computeBaseAndPostCorrelateFilters( + final Pair filterPair = computeBaseAndPostUnnestFilters( filter, virtualColumns, inputColumn, @@ -121,7 +118,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), unnestColumn, outputColumnName, - allowSet + filterPair.rhs ); } else { retVal = new UnnestColumnValueSelectorCursor( @@ -129,7 +126,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), unnestColumn, outputColumnName, - allowSet + filterPair.rhs ); } } else { @@ -138,13 +135,16 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), unnestColumn, outputColumnName, - allowSet + filterPair.rhs ); } + // This is needed at this moment for nested queries + // Future developer would want to move the virtual columns + // inside the UnnestCursor and wrap the columnSelectorFactory return PostJoinCursor.wrap( retVal, virtualColumns, - filterPair.rhs + null ); } ); @@ -260,9 +260,9 @@ public VirtualColumn getUnnestColumn() * @param inputColumn input column to unnest if it's a direct access; otherwise null * @param inputColumnCapabilites input column capabilities if known; otherwise null * - * @return pair of pre- and post-correlate filters + * @return pair of pre- and post-unnest filters */ - private Pair computeBaseAndPostCorrelateFilters( + private Pair computeBaseAndPostUnnestFilters( @Nullable final Filter queryFilter, final VirtualColumns queryVirtualColumns, @Nullable final String inputColumn, @@ -282,7 +282,7 @@ void add(@Nullable final Filter filter) final Set requiredColumns = filter.getRequiredColumns(); - // Run filter post-correlate if it refers to any virtual columns. + // Run filter post-unnest if it refers to any virtual columns. if (queryVirtualColumns.getVirtualColumns().length > 0) { for (String column : requiredColumns) { if (queryVirtualColumns.exists(column)) { @@ -293,13 +293,16 @@ void add(@Nullable final Filter filter) } if (requiredColumns.contains(outputColumnName)) { - // Try to move filter pre-correlate if possible. + // Rewrite filter post-unnest if possible. final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); if (newFilter != null) { + // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting + // any rows that do not match this filter at all. preFilters.add(newFilter); - } else { - postFilters.add(filter); } + // This is needed as a filter on an MV String Dimension returns the entire row matching the filter + // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. + postFilters.add(filter); } else { preFilters.add(filter); } @@ -308,11 +311,6 @@ void add(@Nullable final Filter filter) final FilterSplitter filterSplitter = new FilterSplitter(); - if (allowSet != null && !allowSet.isEmpty()) { - // Filter on input column if possible (it may be faster); otherwise use output column. - filterSplitter.add(new InDimFilter(inputColumn != null ? inputColumn : outputColumnName, allowSet)); - } - if (queryFilter instanceof AndFilter) { for (Filter filter : ((AndFilter) queryFilter).getFilters()) { filterSplitter.add(filter); diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index aec318aa7a9e..2f4119af8f83 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -111,8 +111,7 @@ public class QueryRunnerTestHelper "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) ); public static final Granularity DAY_GRAN = Granularities.DAY; diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index 826d612f678c..a09d2fc256e9 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -239,8 +239,7 @@ public void testGroupBy() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -453,8 +452,7 @@ public void testGroupByOnMissingColumn() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions( @@ -566,8 +564,7 @@ public void testGroupByOnUnnestedVirtualColumn() "mv_to_array(placementish)", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ), - null + ) ); GroupByQuery query = makeQueryBuilder() @@ -655,8 +652,7 @@ public void testGroupByOnUnnestedVirtualMultiColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) ); GroupByQuery query = makeQueryBuilder() diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index 160f06140c40..a0fa2b929e09 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -22,7 +22,6 @@ import com.google.common.collect.Lists; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.DefaultGenericQueryMetricsFactory; import org.apache.druid.query.Druids; import org.apache.druid.query.QueryPlus; @@ -46,9 +45,7 @@ import org.junit.runners.Parameterized; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -96,27 +93,6 @@ private Druids.ScanQueryBuilder newTestUnnestQuery() .legacy(legacy); } - private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet() - { - List allowList = Arrays.asList("a", "b", "c"); - LinkedHashSet allowSet = new LinkedHashSet(allowList); - return Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - new ExpressionVirtualColumn( - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", - null, - ExprMacroTable.nil() - ), - allowSet - )) - .columns(Collections.emptyList()) - .eternityInterval() - .limit(3) - .legacy(legacy); - } - @Test public void testScanOnUnnest() { @@ -188,8 +164,7 @@ public void testUnnestRunnerVirtualColumnsUsingSingleColumn() "mv_to_array(placementish)", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -258,8 +233,7 @@ public void testUnnestRunnerVirtualColumnsUsingMultipleColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -461,65 +435,6 @@ public void testUnnestRunnerWithOrdering() ScanQueryRunnerTest.verify(ascendingExpectedResults, results); } - @Test - public void testUnnestRunnerNonNullAllowSet() - { - ScanQuery query = newTestUnnestQueryWithAllowSet() - .intervals(I_0112_0114) - .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - .limit(3) - .build(); - - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - FACTORY, - new IncrementalIndexSegment( - index, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - - Iterable results = queryRunner.run(QueryPlus.wrap(query)).toList(); - - String[] columnNames; - if (legacy) { - columnNames = new String[]{ - getTimestampName() + ":TIME", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } else { - columnNames = new String[]{ - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } - String[] values; - if (legacy) { - values = new String[]{ - "2011-01-12T00:00:00.000Z\ta", - "2011-01-12T00:00:00.000Z\tb", - "2011-01-13T00:00:00.000Z\ta" - }; - } else { - values = new String[]{ - "a", - "b", - "a" - }; - } - - final List>> events = ScanQueryRunnerTest.toEvents(columnNames, legacy, values); - List expectedResults = toExpected( - events, - legacy - ? Lists.newArrayList(getTimestampName(), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - : Collections.singletonList(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST), - 0, - 3 - ); - ScanQueryRunnerTest.verify(expectedResults, results); - } - private String getTimestampName() { diff --git a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java index e822913489dd..c30022216bd4 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java @@ -258,8 +258,7 @@ public void testTopNStringVirtualColumnUnnest() "mv_to_array(\"placementish\")", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ), - null + ) )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) @@ -341,8 +340,7 @@ public void testTopNStringVirtualMultiColumnUnnest() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index 11b6b7e1d6a0..07364fc1a8cd 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -35,14 +35,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandlingTest { private static String OUTPUT_NAME = "unnested-column"; - private static LinkedHashSet IGNORE_SET = null; - private static LinkedHashSet IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f")); @BeforeClass public static void setUpClass() @@ -74,7 +71,7 @@ public void test_list_unnest_cursors() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -108,7 +105,7 @@ public void test_list_unnest_cursors_user_supplied_list() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -140,7 +137,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -177,7 +174,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -211,7 +208,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -241,7 +238,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -276,7 +273,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -310,14 +307,14 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"" + OUTPUT_NAME + "\"", null, ExprMacroTable.nil()), "tmp-out", - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory() .makeColumnValueSelector("tmp-out"); @@ -352,7 +349,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -390,7 +387,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -408,44 +405,6 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() Assert.assertEquals(k, 10); } - @Test - public void test_list_unnest_cursors_user_supplied_list_with_ignore_set() - { - List inputList = Arrays.asList( - Arrays.asList("a", "b", "c"), - Arrays.asList("e", "f", "g", "h", "i"), - Collections.singletonList("j") - ); - - List expectedResults = Arrays.asList("b", "f"); - - //Create base cursor - ListCursor listCursor = new ListCursor(inputList); - - //Create unnest cursor - UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( - listCursor, - listCursor.getColumnSelectorFactory(), - new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET1 - ); - ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() - .makeColumnValueSelector(OUTPUT_NAME); - int k = 0; - while (!unnestCursor.isDone()) { - Object valueSelectorVal = unnestColumnValueSelector.getObject(); - if (valueSelectorVal == null) { - Assert.assertEquals(null, expectedResults.get(k)); - } else { - Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString()); - } - k++; - unnestCursor.advance(); - } - Assert.assertEquals(k, 2); - } - @Test public void test_list_unnest_cursors_user_supplied_list_double() { @@ -466,7 +425,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -500,7 +459,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -534,7 +493,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -571,7 +530,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -607,7 +566,7 @@ public void test_list_unnest_cursors_dimSelector() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); // should return a column value selector for this case BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory() @@ -650,7 +609,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index e064587a05ea..d852547e4ec4 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -46,7 +46,6 @@ import org.junit.Test; import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.List; public class UnnestStorageAdapterTest extends InitializedNullHandlingTest @@ -56,13 +55,10 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest private static IncrementalIndexStorageAdapter INCREMENTAL_INDEX_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER1; - private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER2; - private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER3; private static List ADAPTERS; private static String COLUMNNAME = "multi-string1"; private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1"; private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again"; - private static LinkedHashSet IGNORE_SET = new LinkedHashSet<>(Arrays.asList("1", "3", "5")); @BeforeClass public static void setup() @@ -86,29 +82,17 @@ public static void setup() INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX); UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - null + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) ); + UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( - INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - IGNORE_SET - ); - UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - null - ); - UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter( - UNNEST_STORAGE_ADAPTER1, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - IGNORE_SET + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) ); + ADAPTERS = ImmutableList.of( UNNEST_STORAGE_ADAPTER, - UNNEST_STORAGE_ADAPTER1, - UNNEST_STORAGE_ADAPTER2, - UNNEST_STORAGE_ADAPTER3 + UNNEST_STORAGE_ADAPTER1 ); } @@ -217,9 +201,9 @@ public void test_unnest_adapters_basic() @Test public void test_two_levels_of_unnest_adapters() { - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER2.makeCursors( + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( null, - UNNEST_STORAGE_ADAPTER2.getInterval(), + UNNEST_STORAGE_ADAPTER1.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, @@ -256,148 +240,6 @@ public void test_two_levels_of_unnest_adapters() }); } - @Test - public void test_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - */ - Assert.assertEquals(count, 3); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_two_levels_of_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER3.makeCursors( - null, - UNNEST_STORAGE_ADAPTER3.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3; - assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME1)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME1); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - unnest of that unnest will have 3*3 = 9 entries - */ - Assert.assertEquals(count, 9); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_unnest_adapters_methods_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1; - assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - IdLookup idlookUp = dimSelector.idLookup(); - Assert.assertFalse(dimSelector.isNull()); - int[] indices = new int[]{1, 3, 5}; - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Assert.assertEquals(idlookUp.lookupId((String) dimSelectorVal), indices[count]); - // after unnest first entry in get row should equal the object - // and the row size will always be 1 - Assert.assertEquals(dimSelector.getRow().get(0), indices[count]); - Assert.assertEquals(dimSelector.getRow().size(), 1); - Assert.assertNotNull(dimSelector.makeValueMatcher(OUTPUT_COLUMN_NAME)); - cursor.advance(); - count++; - } - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - Assert.assertEquals(count, 3); - return null; - }); - } - private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier) { MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 10a12fbec3b3..5c5bfd8aae4e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -172,8 +172,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) correlateRowSignature.getColumnName(correlateRowSignature.size() - 1), Calcites.getColumnTypeForRelDataType(rexNodeToUnnest.getType()), getPlannerContext().getExprMacroTable() - ), - null + ) ), correlateRowSignature, getPlannerContext(), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 67d1971a16ef..f8976de7b053 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2697,6 +2697,37 @@ public void testUnnestInline() ); } + @Test + public void testUnnestInlineWithCount() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT COUNT(*) FROM (select c from UNNEST(ARRAY[1,2,3]) as unnested(c))", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + InlineDataSource.fromIterable( + ImmutableList.of( + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L} + ), + RowSignature.builder().add("EXPR$0", ColumnType.LONG).build() + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .context(QUERY_CONTEXT_UNNEST) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .build() + ), + ImmutableList.of( + new Object[]{3L} + ) + ); + } + @Test public void testUnnest() { @@ -2713,8 +2744,7 @@ public void testUnnest() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2768,15 +2798,13 @@ public void testUnnestTwice() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ), - null + ) ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ), - null + ) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2835,15 +2863,13 @@ public void testUnnestTwiceWithFiltersAndExpressions() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ), - null + ) ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ), - null + ) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2898,8 +2924,7 @@ public void testUnnestWithGroupBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -2943,8 +2968,7 @@ public void testUnnestWithGroupByOrderBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -2999,8 +3023,7 @@ public void testUnnestWithGroupByOrderByWithLimit() new TopNQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .dimension(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) @@ -3038,8 +3061,7 @@ public void testUnnestWithGroupByHaving() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -3081,8 +3103,7 @@ public void testUnnestWithLimit() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3116,8 +3137,7 @@ public void testUnnestFirstQueryOnSelect() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3178,8 +3198,7 @@ public void testUnnestWithFilters() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3228,8 +3247,7 @@ public void testUnnestWithFiltersInsideAndOutside() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3262,8 +3280,7 @@ public void testUnnestWithFiltersOutside() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3305,8 +3322,7 @@ public void testUnnestWithInFilters() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) @@ -3342,8 +3358,7 @@ public void testUnnestVirtualWithColumns() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), - null + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3386,8 +3401,7 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() "j0.unnest", "array(\"dim2\",\"dim4\")", ColumnType.STRING_ARRAY - ), - null + ) ) ) .setInterval(querySegmentSpec(Filtration.eternity())) @@ -3455,8 +3469,7 @@ public void testUnnestWithJoinOnTheLeft() "(\"dim2\" == \"j0.dim2\")", JoinType.INNER ), - expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3545,4 +3558,183 @@ public void testUnnestWithConstant() ) ); } + + @Test + public void testUnnestWithSQLFunctionOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT strlen(d3) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .virtualColumns(expressionVirtualColumn("v0", "strlen(\"j0.unnest\")", ColumnType.LONG)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("v0")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{0}, + new Object[]{0}, + new Object[]{0} + ) : + ImmutableList.of( + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{0}, + new Object[]{null}, + new Object[]{null} + ) + ); + } + + @Test + public void testUnnestWithINFiltersWithLeftRewrite() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithINFiltersWithNoLeftRewrite() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','b')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithInvalidINFiltersOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('foo','bar')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("foo", "bar"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of() + ); + } + + @Test + public void testUnnestWithNotFiltersOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3!='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(not(selector("j0.unnest", "d", null))) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{""}, + new Object[]{""}, + new Object[]{""} + ) : + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{""}, + new Object[]{null}, + new Object[]{null} + ) + ); + } } From 586f3ee7d3583231bed616b474b76b4223b7123b Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 10 Mar 2023 17:42:33 -0800 Subject: [PATCH 02/18] Not pushing filters in now, will be done if needed later when we migrate the filter inside the data source --- .../UnnestColumnValueSelectorCursor.java | 34 ++----------- .../druid/segment/UnnestDimensionCursor.java | 35 ++----------- .../druid/segment/UnnestStorageAdapter.java | 11 ++-- .../UnnestColumnValueSelectorCursorTest.java | 51 +++++++------------ 4 files changed, 27 insertions(+), 104 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 3b0a04857d25..9179d55dc1f8 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -22,12 +22,9 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; -import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; @@ -52,9 +49,6 @@ * unnestCursor.advance() -> 'e' *

*

- * The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to - * the next available match. - *

* The index reference points to the index of each row that the unnest cursor is accessing through currentVal * The index ranges from 0 to the size of the list in each row which is held in the unnestListForCurrentRow *

@@ -71,16 +65,13 @@ public class UnnestColumnValueSelectorCursor implements Cursor private Object currentVal; private List unnestListForCurrentRow; private boolean needInitialization; - private ValueMatcher valueMatcher; - @Nullable - private final Filter allowFilter; + public UnnestColumnValueSelectorCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, - String outputColumnName, - @Nullable Filter allowFilter + String outputColumnName ) { this.baseCursor = cursor; @@ -93,7 +84,6 @@ public UnnestColumnValueSelectorCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowFilter = allowFilter; } @Override @@ -253,13 +243,7 @@ public void advance() @Override public void advanceUninterruptibly() { - while (true) { - advanceAndUpdate(); - boolean match = valueMatcher.matches(); - if (match || baseCursor.isDone()) { - return; - } - } + advanceAndUpdate(); } @Override @@ -308,22 +292,10 @@ private void getNextRow() /** * This initializes the unnest cursor and creates data structures * to start iterating over the values to be unnested. - * This would also create a bitset for dictonary encoded columns to - * check for matching values specified in allowedList of UnnestDataSource. */ private void initialize() { getNextRow(); - if (allowFilter != null) { - this.valueMatcher = allowFilter.makeMatcher(getColumnSelectorFactory()); - } else { - this.valueMatcher = BooleanValueMatcher.of(true); - } - // If the first value the index is pointing to does not match the filter - // advance the index to the first value which will match - if (!valueMatcher.matches()) { - advance(); - } needInitialization = false; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index c99995fe261e..6c4c1a953439 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -23,13 +23,11 @@ import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; @@ -58,15 +56,6 @@ *

* Total 5 advance calls above *

- * The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to - * the next available match. The hashSet is converted into a bitset (during initialization) for efficiency. - * If allowSet is ['c', 'd'] then the advance moves over to the next available match - *

- * advance() -> 2 -> 'c' - * advance() -> 3 -> 'd' (advances base cursor first) - * advance() -> 2 -> 'c' - *

- * Total 3 advance calls in this case *

* The index reference points to the index of each row that the unnest cursor is accessing * The indexedInts for each row are held in the indexedIntsForCurrentRow object @@ -79,9 +68,6 @@ public class UnnestDimensionCursor implements Cursor private final DimensionSelector dimSelector; private final VirtualColumn unnestColumn; private final String outputName; - @Nullable - private final Filter allowFilter; - private ValueMatcher valueMatcher; private final ColumnSelectorFactory baseColumnSelectorFactory; private int index; @Nullable @@ -93,8 +79,7 @@ public UnnestDimensionCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, - String outputColumnName, - @Nullable Filter allowFilter + String outputColumnName ) { this.baseCursor = cursor; @@ -107,7 +92,6 @@ public UnnestDimensionCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowFilter = allowFilter; } @Override @@ -283,13 +267,7 @@ public void advance() @Override public void advanceUninterruptibly() { - while (true) { - advanceAndUpdate(); - boolean match = valueMatcher.matches(); - if (match || baseCursor.isDone()) { - return; - } - } + advanceAndUpdate(); } @Override @@ -328,19 +306,12 @@ public void reset() private void initialize() { index = 0; - if (allowFilter != null) { - this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); - } else { - this.valueMatcher = BooleanValueMatcher.of(true); - } this.indexIntsForRow = new SingleIndexInts(); if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!valueMatcher.matches() && !baseCursor.isDone()) { - advance(); - } + needInitialization = false; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 7c40db0792a5..ee4b6a625412 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -117,16 +117,14 @@ public Sequence makeCursors( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - filterPair.rhs + outputColumnName ); } else { retVal = new UnnestColumnValueSelectorCursor( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - filterPair.rhs + outputColumnName ); } } else { @@ -134,8 +132,7 @@ public Sequence makeCursors( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - filterPair.rhs + outputColumnName ); } // This is needed at this moment for nested queries @@ -144,7 +141,7 @@ public Sequence makeCursors( return PostJoinCursor.wrap( retVal, virtualColumns, - null + filterPair.rhs ); } ); diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index 07364fc1a8cd..bafb50f7089a 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -70,8 +70,7 @@ public void test_list_unnest_cursors() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -104,8 +103,7 @@ public void test_list_unnest_cursors_user_supplied_list() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -136,8 +134,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -173,8 +170,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -207,8 +203,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -237,8 +232,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -272,8 +266,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -306,15 +299,13 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"" + OUTPUT_NAME + "\"", null, ExprMacroTable.nil()), - "tmp-out", - null + "tmp-out" ); ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory() .makeColumnValueSelector("tmp-out"); @@ -348,8 +339,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -386,8 +376,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -424,8 +413,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -458,8 +446,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -492,8 +479,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -529,8 +515,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -565,8 +550,7 @@ public void test_list_unnest_cursors_dimSelector() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); // should return a column value selector for this case BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory() @@ -608,8 +592,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); From 75e8b776f6c7ddf68e4bdd0601df999d9c3629f8 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 13 Mar 2023 11:38:10 -0700 Subject: [PATCH 03/18] Removing stale comments and updating docs --- docs/querying/datasource.md | 6 ++---- .../java/org/apache/druid/segment/UnnestStorageAdapter.java | 4 ---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/docs/querying/datasource.md b/docs/querying/datasource.md index 442024dd75f5..e3eacf032bf3 100644 --- a/docs/querying/datasource.md +++ b/docs/querying/datasource.md @@ -412,15 +412,13 @@ The `unnest` datasource uses the following syntax: "type": "expression", "expression": "\"column_reference\"" }, - "outputName": "unnested_target_column", - "allowList": [] - }, + "outputName": "unnested_target_column" + } ``` * `dataSource.type`: Set this to `unnest`. * `dataSource.base`: Defines the datasource you want to unnest. * `dataSource.base.type`: The type of datasource you want to unnest, such as a table. * `dataSource.virtualColumn`: [Virtual column](virtual-columns.md) that references the nested values. The output name of this column is reused as the name of the column that contains unnested values. You can replace the source column with the unnested column by specifying the source column's name or a new column by specifying a different name. Outputting it to a new column can help you verify that you get the results that you expect but isn't required. -* `dataSource.allowList`: Optional. The subset of values you want to unnest. To learn more about how to use the `unnest` datasource, see the [unnest tutorial](../tutorials/tutorial-unnest-datasource.md). diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index ee4b6a625412..94c471d1a6e7 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -135,9 +135,6 @@ public Sequence makeCursors( outputColumnName ); } - // This is needed at this moment for nested queries - // Future developer would want to move the virtual columns - // inside the UnnestCursor and wrap the columnSelectorFactory return PostJoinCursor.wrap( retVal, virtualColumns, @@ -297,7 +294,6 @@ void add(@Nullable final Filter filter) // any rows that do not match this filter at all. preFilters.add(newFilter); } - // This is needed as a filter on an MV String Dimension returns the entire row matching the filter // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. postFilters.add(filter); } else { From dda787248962a0af816616500e8a6c8761c8b5d7 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 13 Mar 2023 21:32:16 -0700 Subject: [PATCH 04/18] Temp changes for selector filter --- .../rule/CorrelateFilterRTransposeRule.java | 2 +- ...lateProjectOnFIlterRightTransposeRule.java | 85 +++++++++++ .../druid/sql/calcite/rule/DruidRules.java | 1 + .../sql/calcite/CalciteArraysQueryTest.java | 144 ++++++++++++++++++ 4 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java index 66731ca78ade..00f472c5acc8 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java @@ -84,7 +84,7 @@ public void onMatch(final RelOptRuleCall call) /** * Whether an expression refers to correlation variables. */ - private static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) + public static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) { class CorrelationVisitor extends RexVisitorImpl { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java new file mode 100644 index 000000000000..e9782bcd1a7b --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Uncollect; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; + +/** + * Rule that pulls a {@link Filter} from the right-hand side of a {@link Correlate} above the Correlate in presence of an unneeded Project + * Allows filters on unnested fields to be added to queries that use {@link org.apache.druid.query.UnnestDataSource}. + * + * @see CorrelateFilterRTransposeRule similar, but for without a Project atop Filter + */ +public class CorrelateProjectOnFIlterRightTransposeRule extends RelOptRule +{ + private static final CorrelateProjectOnFIlterRightTransposeRule INSTANCE = new CorrelateProjectOnFIlterRightTransposeRule(); + + public CorrelateProjectOnFIlterRightTransposeRule() + { + super( + operand( + Correlate.class, + operand(RelNode.class, any()), + operand(Project.class, operand(Filter.class, operand(Uncollect.class, any()))) + )); + } + + @Override + public boolean matches(RelOptRuleCall call) + { + final Correlate correlate = call.rel(0); + final Filter right = call.rel(3); + + // Can't pull up filters that explicitly refer to the correlation variable. + return !CorrelateFilterRTransposeRule.usesCorrelationId(correlate.getCorrelationId(), right.getCondition()); + } + + public static CorrelateProjectOnFIlterRightTransposeRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Correlate correlate = call.rel(0); + final RelNode left = call.rel(1); + final Project rightP = call.rel(2); + final Filter rightF = call.rel(3); + + if (rightP.getProjects().size() <=1 && rightP.getChildExps().get(0).getKind() == SqlKind.CAST) { + call.transformTo( + call.builder() + .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, rightF.getInput()))) + .filter(RexUtil.shift(rightF.getCondition(), left.getRowType().getFieldCount())) + .build() + ); + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 519e9365b4a9..3bfe21d3c72d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -121,6 +121,7 @@ public static List rules(PlannerContext plannerContext) retVal.add(ProjectCorrelateTransposeRule.INSTANCE); retVal.add(CorrelateFilterLTransposeRule.instance()); retVal.add(CorrelateFilterRTransposeRule.instance()); + retVal.add(CorrelateProjectOnFIlterRightTransposeRule.instance()); } return retVal; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index f8976de7b053..6ffb76e76a6b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3737,4 +3737,148 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() ) ); } + + @Test + public void testUnnestWithSelectorFiltersOnAllColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT * FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(selector("j0.unnest", "b", null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of( + "__time", + "cnt", + "d1", + "d2", + "dim1", + "dim2", + "dim3", + "dim4", + "dim5", + "dim6", + "f1", + "f2", + "j0.unnest", + "l1", + "l2", + "m1", + "m2", + "unique_dim1" + )) + .build() + ), + ImmutableList.of( + new Object[]{ + 946684800000L, + "", + "a", + "[\"a\",\"b\"]", + "a", + "aa", + "1", + 1.0D, + 0.0D, + 1.0F, + 0.0F, + 7L, + 0L, + 1L, + 1.0F, + 1.0D, + "\"AQAAAEAAAA==\"", + "b" + }, + new Object[]{ + 946771200000L, + "10.1", + "", + "[\"b\",\"c\"]", + "a", + "ab", + "2", + 1.7D, + 1.7D, + 0.1F, + 0.1F, + 325323L, + 325323L, + 1L, + 2.0F, + 2.0D, + "\"AQAAAQAAAAHNBA==\"", + "b" + } + ) + ); + } + + @Test + public void testUnnestWithSelectorFiltersOnSelectedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(selector("j0.unnest", "b", null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithOrFiltersOnSelectedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(selector("j0.unnest", "b", null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } } From 3cf5e72e9c352758ef34a2743846f8b4df11e59b Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 13 Mar 2023 22:35:51 -0700 Subject: [PATCH 05/18] Handling rules for a case where selector filters adds an extra layer of project before the filter on top of uncollect --- ...lateProjectOnFIlterRightTransposeRule.java | 12 ++- .../sql/calcite/CalciteArraysQueryTest.java | 100 ++---------------- 2 files changed, 18 insertions(+), 94 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java index e9782bcd1a7b..37aa4ef2f146 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java @@ -72,8 +72,18 @@ public void onMatch(RelOptRuleCall call) final RelNode left = call.rel(1); final Project rightP = call.rel(2); final Filter rightF = call.rel(3); + final Uncollect uncollect = call.rel(4); - if (rightP.getProjects().size() <=1 && rightP.getChildExps().get(0).getKind() == SqlKind.CAST) { + // The project is top of Uncollect and can only refer to the unnested output column + // the project can be a cast if on a string column + // e.g. LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) + // Or for numeric columns takes the shape + // e.g. LogicalProject(subset=[rel#126:Subset#5.NONE.[]], d3=[1.0E0:FLOAT]) + // The projection is always bound to be on a single column reference which will be output of the uncollect + // So we check if there is only a single projection and is it either a CAST or a LITERAL + + final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind(); + if (rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL)) { call.transformTo( call.builder() .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, rightF.getInput()))) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6ffb76e76a6b..593f91936f94 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3738,99 +3738,13 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() ); } - @Test - public void testUnnestWithSelectorFiltersOnAllColumns() - { - skipVectorize(); - cannotVectorize(); - testQuery( - "SELECT * FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) - )) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "j0.unnest", - "l1", - "l2", - "m1", - "m2", - "unique_dim1" - )) - .build() - ), - ImmutableList.of( - new Object[]{ - 946684800000L, - "", - "a", - "[\"a\",\"b\"]", - "a", - "aa", - "1", - 1.0D, - 0.0D, - 1.0F, - 0.0F, - 7L, - 0L, - 1L, - 1.0F, - 1.0D, - "\"AQAAAEAAAA==\"", - "b" - }, - new Object[]{ - 946771200000L, - "10.1", - "", - "[\"b\",\"c\"]", - "a", - "ab", - "2", - 1.7D, - 1.7D, - 0.1F, - 0.1F, - 325323L, - 325323L, - 1L, - 2.0F, - 2.0D, - "\"AQAAAQAAAAHNBA==\"", - "b" - } - ) - ); - } - @Test public void testUnnestWithSelectorFiltersOnSelectedColumn() { skipVectorize(); cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b'", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3854,30 +3768,30 @@ public void testUnnestWithSelectorFiltersOnSelectedColumn() } @Test - public void testUnnestWithOrFiltersOnSelectedColumn() + public void testUnnestWithSelectorFiltersOnSelectedVirtualColumn() { skipVectorize(); cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + "SELECT d3 FROM druid.numfoo, UNNEST(ARRAY[m1,m2]) as unnested (d3) where d3=1 ", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) + .filters(selector("j0.unnest", "1", null)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) .build() ), ImmutableList.of( - new Object[]{"b"}, - new Object[]{"b"} + new Object[]{1.0f}, + new Object[]{1.0f} ) ); } From 76aaed1b4753f92bea6f9b5255c0ebcf8ec626d3 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 14 Mar 2023 10:24:59 -0700 Subject: [PATCH 06/18] Trying to move filter inside unnest part 1 --- .../apache/druid/query/UnnestDataSource.java | 38 ++++-- .../druid/segment/UnnestSegmentReference.java | 10 +- .../druid/segment/UnnestStorageAdapter.java | 54 +++++--- .../druid/query/QueryRunnerTestHelper.java | 3 +- .../groupby/UnnestGroupByQueryRunnerTest.java | 12 +- .../query/scan/UnnestScanQueryRunnerTest.java | 6 +- .../query/topn/UnnestTopNQueryRunnerTest.java | 6 +- .../segment/UnnestStorageAdapterTest.java | 6 +- .../calcite/rel/DruidCorrelateUnnestRel.java | 26 +++- .../druid/sql/calcite/rel/DruidQuery.java | 2 +- .../druid/sql/calcite/rel/DruidUnnestRel.java | 20 ++- .../rule/CorrelateFilterLTransposeRule.java | 2 +- .../rule/CorrelateFilterRTransposeRule.java | 112 ---------------- ...lateProjectOnFIlterRightTransposeRule.java | 95 -------------- .../calcite/rule/DruidFilterUnnestRule.java | 105 +++++++++++++++ .../druid/sql/calcite/rule/DruidRules.java | 4 +- .../sql/calcite/CalciteArraysQueryTest.java | 121 ++++++++---------- 17 files changed, 298 insertions(+), 324 deletions(-) delete mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java delete mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index d46cd226a8a5..776eaf9a4bbe 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -23,12 +23,14 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.planning.DataSourceAnalysis; import org.apache.druid.segment.SegmentReference; import org.apache.druid.segment.UnnestSegmentReference; import org.apache.druid.segment.VirtualColumn; import org.apache.druid.utils.JvmUtils; +import javax.annotation.Nullable; import java.util.List; import java.util.Objects; import java.util.Set; @@ -47,23 +49,29 @@ public class UnnestDataSource implements DataSource { private final DataSource base; private final VirtualColumn virtualColumn; + @Nullable + private final DimFilter unnestFilter; private UnnestDataSource( DataSource dataSource, - VirtualColumn virtualColumn + VirtualColumn virtualColumn, + DimFilter unnestFilter ) { this.base = dataSource; this.virtualColumn = virtualColumn; + this.unnestFilter = unnestFilter; } @JsonCreator public static UnnestDataSource create( @JsonProperty("base") DataSource base, - @JsonProperty("virtualColumn") VirtualColumn virtualColumn + @JsonProperty("virtualColumn") VirtualColumn virtualColumn, + @Nullable @JsonProperty("unnestFilter") DimFilter unnestFilter + ) { - return new UnnestDataSource(base, virtualColumn); + return new UnnestDataSource(base, virtualColumn, unnestFilter); } @JsonProperty("base") @@ -78,6 +86,12 @@ public VirtualColumn getVirtualColumn() return virtualColumn; } + @JsonProperty("unnestFilter") + public DimFilter getUnnestFilter() + { + return unnestFilter; + } + @Override public Set getTableNames() { @@ -96,7 +110,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), virtualColumn); + return new UnnestDataSource(children.get(0), virtualColumn, unnestFilter); } @Override @@ -133,7 +147,8 @@ public Function createSegmentMapFunction( baseSegment -> new UnnestSegmentReference( segmentMapFn.apply(baseSegment), - virtualColumn + virtualColumn, + unnestFilter ) ); } @@ -141,7 +156,7 @@ public Function createSegmentMapFunction( @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, virtualColumn); + return new UnnestDataSource(newSource, virtualColumn, unnestFilter); } @Override @@ -172,14 +187,18 @@ public boolean equals(Object o) return false; } UnnestDataSource that = (UnnestDataSource) o; - return virtualColumn.equals(that.virtualColumn) - && base.equals(that.base); + if (unnestFilter != null) { + return virtualColumn.equals(that.virtualColumn) && unnestFilter.equals(that.unnestFilter) + && base.equals(that.base); + } else { + return virtualColumn.equals(that.virtualColumn) && base.equals(that.base); + } } @Override public int hashCode() { - return Objects.hash(base, virtualColumn); + return Objects.hash(base, virtualColumn, unnestFilter); } @Override @@ -188,6 +207,7 @@ public String toString() return "UnnestDataSource{" + "base=" + base + ", column='" + virtualColumn + '\'' + + ", unnestFilter='" + unnestFilter + '\'' + '}'; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index 8f3871390045..7ad994ab7a89 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -21,6 +21,7 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.timeline.SegmentId; import org.apache.druid.utils.CloseableUtils; import org.joda.time.Interval; @@ -40,15 +41,19 @@ public class UnnestSegmentReference implements SegmentReference private final SegmentReference baseSegment; private final VirtualColumn unnestColumn; + @Nullable + private final DimFilter unnestFilter; public UnnestSegmentReference( SegmentReference baseSegment, - VirtualColumn unnestColumn + VirtualColumn unnestColumn, + DimFilter unnestFilter ) { this.baseSegment = baseSegment; this.unnestColumn = unnestColumn; + this.unnestFilter = unnestFilter; } @Override @@ -100,7 +105,8 @@ public StorageAdapter asStorageAdapter() { return new UnnestStorageAdapter( baseSegment.asStorageAdapter(), - unnestColumn + unnestColumn, + unnestFilter ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 94c471d1a6e7..858d8aa81192 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -27,6 +27,7 @@ import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.filter.BooleanFilter; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; @@ -62,15 +63,19 @@ public class UnnestStorageAdapter implements StorageAdapter private final StorageAdapter baseAdapter; private final VirtualColumn unnestColumn; private final String outputColumnName; + @Nullable + private final DimFilter unnestFilter; public UnnestStorageAdapter( final StorageAdapter baseAdapter, - final VirtualColumn unnestColumn + final VirtualColumn unnestColumn, + final DimFilter unnestFilter ) { this.baseAdapter = baseAdapter; this.unnestColumn = unnestColumn; this.outputColumnName = unnestColumn.getOutputName(); + this.unnestFilter = unnestFilter; } @Override @@ -86,6 +91,7 @@ public Sequence makeCursors( final String inputColumn = getUnnestInputIfDirectAccess(); final Pair filterPair = computeBaseAndPostUnnestFilters( filter, + unnestFilter != null ? unnestFilter.toFilter() : null, virtualColumns, inputColumn, inputColumn == null || virtualColumns.exists(inputColumn) @@ -253,11 +259,11 @@ public VirtualColumn getUnnestColumn() * @param queryVirtualColumns query virtual columns passed to makeCursors * @param inputColumn input column to unnest if it's a direct access; otherwise null * @param inputColumnCapabilites input column capabilities if known; otherwise null - * * @return pair of pre- and post-unnest filters */ private Pair computeBaseAndPostUnnestFilters( @Nullable final Filter queryFilter, + @Nullable final Filter unnestFilter, final VirtualColumns queryVirtualColumns, @Nullable final String inputColumn, @Nullable final ColumnCapabilities inputColumnCapabilites @@ -268,7 +274,22 @@ class FilterSplitter final List preFilters = new ArrayList<>(); final List postFilters = new ArrayList<>(); - void add(@Nullable final Filter filter) + void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter) + { + if (filter == null) { + return; + } + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); + if (newFilter != null) { + // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting + // any rows that do not match this filter at all. + preFilters.add(newFilter); + } + // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. + postFilters.add(filter); + } + + void addPreFilter(@Nullable final Filter filter) { if (filter == null) { return; @@ -285,20 +306,7 @@ void add(@Nullable final Filter filter) } } } - - if (requiredColumns.contains(outputColumnName)) { - // Rewrite filter post-unnest if possible. - final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); - if (newFilter != null) { - // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting - // any rows that do not match this filter at all. - preFilters.add(newFilter); - } - // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. - postFilters.add(filter); - } else { - preFilters.add(filter); - } + preFilters.add(filter); } } @@ -306,10 +314,18 @@ void add(@Nullable final Filter filter) if (queryFilter instanceof AndFilter) { for (Filter filter : ((AndFilter) queryFilter).getFilters()) { - filterSplitter.add(filter); + filterSplitter.addPreFilter(filter); + } + } else { + filterSplitter.addPreFilter(queryFilter); + } + + if (unnestFilter instanceof AndFilter) { + for (Filter filter : ((AndFilter) unnestFilter).getFilters()) { + filterSplitter.addPostFilterWithPreFilterIfRewritePossible(filter); } } else { - filterSplitter.add(queryFilter); + filterSplitter.addPostFilterWithPreFilterIfRewritePossible(unnestFilter); } return Pair.of( diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index 2f4119af8f83..aec318aa7a9e 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -111,7 +111,8 @@ public class QueryRunnerTestHelper "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ) + ), + null ); public static final Granularity DAY_GRAN = Granularities.DAY; diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index a09d2fc256e9..826d612f678c 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -239,7 +239,8 @@ public void testGroupBy() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ) + ), + null )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -452,7 +453,8 @@ public void testGroupByOnMissingColumn() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ) + ), + null )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions( @@ -564,7 +566,8 @@ public void testGroupByOnUnnestedVirtualColumn() "mv_to_array(placementish)", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ) + ), + null ); GroupByQuery query = makeQueryBuilder() @@ -652,7 +655,8 @@ public void testGroupByOnUnnestedVirtualMultiColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ) + ), + null ); GroupByQuery query = makeQueryBuilder() diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index a0fa2b929e09..5c18e4784f12 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -164,7 +164,8 @@ public void testUnnestRunnerVirtualColumnsUsingSingleColumn() "mv_to_array(placementish)", ColumnType.STRING, TestExprMacroTable.INSTANCE - ) + ), + null )) .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -233,7 +234,8 @@ public void testUnnestRunnerVirtualColumnsUsingMultipleColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ) + ), + null )) .columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() diff --git a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java index c30022216bd4..e822913489dd 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java @@ -258,7 +258,8 @@ public void testTopNStringVirtualColumnUnnest() "mv_to_array(\"placementish\")", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ) + ), + null )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) @@ -340,7 +341,8 @@ public void testTopNStringVirtualMultiColumnUnnest() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ) + ), + null )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index d852547e4ec4..e47f09e54e1e 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -82,12 +82,14 @@ public static void setup() INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX); UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + null ); UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + null ); ADAPTERS = ImmutableList.of( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 5c5bfd8aae4e..5f0483e29149 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -31,6 +31,7 @@ import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Correlate; import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; @@ -44,10 +45,12 @@ import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.TableDataSource; import org.apache.druid.query.UnnestDataSource; +import org.apache.druid.query.filter.DimFilter; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.expression.builtin.MultiValueStringToArrayOperatorConversion; +import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -63,7 +66,7 @@ * Each correlate can be perceived as a join with the join type being inner * the left of a correlate as seen in the rule {@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} * is the {@link DruidQueryRel} while the right will always be an {@link DruidUnnestRel}. - * + *

* Since this is a subclass of DruidRel it is automatically considered by other rules that involves DruidRels. * Some example being SELECT_PROJECT and SORT_PROJECT rules in {@link org.apache.druid.sql.calcite.rule.DruidRules.DruidQueryRule} */ @@ -136,11 +139,13 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) final DruidUnnestRel unnestDatasourceRel = (DruidUnnestRel) right; final DataSource leftDataSource; final RowSignature leftDataSourceSignature; + final Filter unnestFilter = unnestDatasourceRel.getUnnestFilter(); if (right.getRowType().getFieldNames().size() != 1) { throw new CannotBuildQueryException("Cannot perform correlated join + UNNEST with more than one column"); } + if (computeLeftRequiresSubquery(leftDruidRel)) { // Left side is doing more than simple scan: generate a subquery. leftDataSource = new QueryDataSource(leftQuery.getQuery()); @@ -164,6 +169,20 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) // Final output row signature. final RowSignature correlateRowSignature = getCorrelateRowSignature(correlateRel, leftQuery); + final DimFilter unnestFilterOnDataSource; + if (unnestFilter != null) { + RowSignature filterRowSignature = RowSignatures.fromRelDataType(ImmutableList.of(correlateRowSignature.getColumnName( + correlateRowSignature.size() - 1)), unnestFilter.getInput().getRowType()); + unnestFilterOnDataSource = Filtration.create(DruidQuery.getDimFilter( + getPlannerContext(), + filterRowSignature, + null, + unnestFilter + )) + .optimizeFilterOnly(filterRowSignature).getDimFilter(); + } else { + unnestFilterOnDataSource = null; + } return partialQuery.build( UnnestDataSource.create( @@ -172,7 +191,8 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) correlateRowSignature.getColumnName(correlateRowSignature.size() - 1), Calcites.getColumnTypeForRelDataType(rexNodeToUnnest.getType()), getPlannerContext().getExprMacroTable() - ) + ), + unnestFilterOnDataSource ), correlateRowSignature, getPlannerContext(), @@ -282,7 +302,7 @@ public Set getDataSourceNames() /** * Computes whether a particular left-side rel requires a subquery, or if we can operate on its underlying * datasource directly. - * + *

* Stricter than {@link DruidJoinQueryRel#computeLeftRequiresSubquery}: this method only allows scans (not mappings). * This is OK because any mapping or other simple projection would have been pulled above the {@link Correlate} by * {@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule}. diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index 116acb0c5c27..2a093e6a3012 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -332,7 +332,7 @@ private static DimFilter computeHavingFilter( } @Nonnull - private static DimFilter getDimFilter( + public static DimFilter getDimFilter( final PlannerContext plannerContext, final RowSignature rowSignature, @Nullable final VirtualColumnRegistry virtualColumnRegistry, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java index f7809e2faee6..de4294a73782 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java @@ -24,6 +24,7 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Uncollect; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalValues; @@ -56,16 +57,24 @@ public class DruidUnnestRel extends DruidRel * {@link org.apache.calcite.rex.RexFieldAccess}. */ private final RexNode inputRexNode; + private final Filter unnestFilter; private DruidUnnestRel( final RelOptCluster cluster, final RelTraitSet traits, final RexNode inputRexNode, + final Filter unnestFilter, final PlannerContext plannerContext ) { super(cluster, traits, plannerContext); this.inputRexNode = inputRexNode; + this.unnestFilter = unnestFilter; + } + + public Filter getUnnestFilter() + { + return unnestFilter; } public static DruidUnnestRel create( @@ -83,6 +92,7 @@ public static DruidUnnestRel create( cluster, traits, unnestRexNode, + null, plannerContext ); } @@ -100,6 +110,7 @@ public RelNode accept(RexShuttle shuttle) getCluster(), getTraitSet(), newInputRexNode, + unnestFilter, getPlannerContext() ); } @@ -125,6 +136,11 @@ public DruidUnnestRel withPartialQuery(PartialDruidQuery newQueryBuilder) throw new UnsupportedOperationException(); } + public DruidUnnestRel withFilter(Filter f) + { + return new DruidUnnestRel(getCluster(), getTraitSet(), inputRexNode, f, getPlannerContext()); + } + /** * Returns a new rel with a new input. The output type is unchanged. */ @@ -134,6 +150,7 @@ public DruidUnnestRel withUnnestRexNode(final RexNode newInputRexNode) getCluster(), getTraitSet(), newInputRexNode, + unnestFilter, getPlannerContext() ); } @@ -160,6 +177,7 @@ public DruidUnnestRel asDruidConvention() getCluster(), getTraitSet().replace(DruidConvention.instance()), inputRexNode, + unnestFilter, getPlannerContext() ); } @@ -167,7 +185,7 @@ public DruidUnnestRel asDruidConvention() @Override public RelWriter explainTerms(RelWriter pw) { - return pw.item("expr", inputRexNode); + return pw.item("expr", inputRexNode).item("filter", unnestFilter); } @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java index 8e2b8d8d72fb..7b15c2cab735 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java @@ -30,7 +30,7 @@ * Rule that pulls a {@link Filter} from the left-hand side of a {@link Correlate} above the Correlate. * Allows subquery elimination. * - * @see CorrelateFilterRTransposeRule similar, but for right-hand side filters + * @see DruidFilterUnnestRule similar, but for right-hand side filters */ public class CorrelateFilterLTransposeRule extends RelOptRule { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java deleted file mode 100644 index 00f472c5acc8..000000000000 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.sql.calcite.rule; - -import com.google.common.collect.ImmutableList; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Correlate; -import org.apache.calcite.rel.core.CorrelationId; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rex.RexCorrelVariable; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.rex.RexVisitorImpl; - -/** - * Rule that pulls a {@link Filter} from the right-hand side of a {@link Correlate} above the Correlate. - * Allows filters on unnested fields to be added to queries that use {@link org.apache.druid.query.UnnestDataSource}. - * - * @see CorrelateFilterLTransposeRule similar, but for left-hand side filters - */ -public class CorrelateFilterRTransposeRule extends RelOptRule -{ - private static final CorrelateFilterRTransposeRule INSTANCE = new CorrelateFilterRTransposeRule(); - - public CorrelateFilterRTransposeRule() - { - super( - operand( - Correlate.class, - operand(RelNode.class, any()), - operand(Filter.class, any()) - )); - } - - public static CorrelateFilterRTransposeRule instance() - { - return INSTANCE; - } - - @Override - public boolean matches(RelOptRuleCall call) - { - final Correlate correlate = call.rel(0); - final Filter right = call.rel(2); - - // Can't pull up filters that explicitly refer to the correlation variable. - return !usesCorrelationId(correlate.getCorrelationId(), right.getCondition()); - } - - @Override - public void onMatch(final RelOptRuleCall call) - { - final Correlate correlate = call.rel(0); - final RelNode left = call.rel(1); - final Filter right = call.rel(2); - - call.transformTo( - call.builder() - .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, right.getInput()))) - .filter(RexUtil.shift(right.getCondition(), left.getRowType().getFieldCount())) - .build() - ); - } - - /** - * Whether an expression refers to correlation variables. - */ - public static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) - { - class CorrelationVisitor extends RexVisitorImpl - { - private boolean found = false; - - public CorrelationVisitor() - { - super(true); - } - - @Override - public Void visitCorrelVariable(RexCorrelVariable correlVariable) - { - if (correlVariable.id.equals(correlationId)) { - found = true; - } - return null; - } - } - - final CorrelationVisitor visitor = new CorrelationVisitor(); - rexNode.accept(visitor); - return visitor.found; - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java deleted file mode 100644 index 37aa4ef2f146..000000000000 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.sql.calcite.rule; - -import com.google.common.collect.ImmutableList; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Correlate; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.Uncollect; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.sql.SqlKind; - -/** - * Rule that pulls a {@link Filter} from the right-hand side of a {@link Correlate} above the Correlate in presence of an unneeded Project - * Allows filters on unnested fields to be added to queries that use {@link org.apache.druid.query.UnnestDataSource}. - * - * @see CorrelateFilterRTransposeRule similar, but for without a Project atop Filter - */ -public class CorrelateProjectOnFIlterRightTransposeRule extends RelOptRule -{ - private static final CorrelateProjectOnFIlterRightTransposeRule INSTANCE = new CorrelateProjectOnFIlterRightTransposeRule(); - - public CorrelateProjectOnFIlterRightTransposeRule() - { - super( - operand( - Correlate.class, - operand(RelNode.class, any()), - operand(Project.class, operand(Filter.class, operand(Uncollect.class, any()))) - )); - } - - @Override - public boolean matches(RelOptRuleCall call) - { - final Correlate correlate = call.rel(0); - final Filter right = call.rel(3); - - // Can't pull up filters that explicitly refer to the correlation variable. - return !CorrelateFilterRTransposeRule.usesCorrelationId(correlate.getCorrelationId(), right.getCondition()); - } - - public static CorrelateProjectOnFIlterRightTransposeRule instance() - { - return INSTANCE; - } - - @Override - public void onMatch(RelOptRuleCall call) - { - final Correlate correlate = call.rel(0); - final RelNode left = call.rel(1); - final Project rightP = call.rel(2); - final Filter rightF = call.rel(3); - final Uncollect uncollect = call.rel(4); - - // The project is top of Uncollect and can only refer to the unnested output column - // the project can be a cast if on a string column - // e.g. LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) - // Or for numeric columns takes the shape - // e.g. LogicalProject(subset=[rel#126:Subset#5.NONE.[]], d3=[1.0E0:FLOAT]) - // The projection is always bound to be on a single column reference which will be output of the uncollect - // So we check if there is only a single projection and is it either a CAST or a LITERAL - - final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind(); - if (rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL)) { - call.transformTo( - call.builder() - .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, rightF.getInput()))) - .filter(RexUtil.shift(rightF.getCondition(), left.getRowType().getFieldCount())) - .build() - ); - } - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java new file mode 100644 index 000000000000..38dd9a476092 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.sql.SqlKind; +import org.apache.druid.sql.calcite.rel.DruidUnnestRel; + +public class DruidFilterUnnestRule extends RelOptRule +{ + private static final DruidFilterUnnestRule INSTANCE = new DruidFilterUnnestRule(); + + private DruidFilterUnnestRule() + { + super( + operand( + Filter.class, + operand(DruidUnnestRel.class, any()) + ) + ); + } + + public static DruidFilterUnnestRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Filter filter = call.rel(0); + final DruidUnnestRel unnestDatasourceRel = call.rel(1); + DruidUnnestRel newRel = unnestDatasourceRel.withFilter(filter); + call.transformTo(newRel); + } + + // This is for a special case of handling selector filters + // on top of UnnestDataSourceRel when Calcite adds an extra + // LogicalProject on the LogicalFilter. For e.g. #122 here + // SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' + // 126:LogicalProject(d3=[$17]) + // 124:LogicalCorrelate(subset=[rel#125:Subset#6.NONE.[]], correlation=[$cor0], joinType=[inner], requiredColumns=[{3}]) + // 8:LogicalTableScan(subset=[rel#114:Subset#0.NONE.[]], table=[[druid, numfoo]]) + // 122:LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) + // 120:LogicalFilter(subset=[rel#121:Subset#4.NONE.[]], condition=[=($0, 'b')]) + // 118:Uncollect(subset=[rel#119:Subset#3.NONE.[]]) + // 116:LogicalProject(subset=[rel#117:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)]) + // 9:LogicalValues(subset=[rel#115:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) + + // This logical project does a type cast only which Druid already has information about + // So we can skip this LogicalProject + // Extensive unit tests can be found in {@link CalciteArraysQueryTest} + + static class DruidProjectOnCorrelateRule extends RelOptRule + { + private static final DruidProjectOnCorrelateRule INSTANCE = new DruidProjectOnCorrelateRule(); + + private DruidProjectOnCorrelateRule() + { + super( + operand( + Project.class, + operand(DruidUnnestRel.class, any()) + ) + ); + } + + public static DruidProjectOnCorrelateRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Project rightP = call.rel(0); + final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind(); + final DruidUnnestRel unnestDatasourceRel = call.rel(1); + + if (rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL)) { + call.transformTo(unnestDatasourceRel); + } + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 3bfe21d3c72d..42110f2bccc4 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -120,8 +120,8 @@ public static List rules(PlannerContext plannerContext) retVal.add(new DruidCorrelateUnnestRule(plannerContext)); retVal.add(ProjectCorrelateTransposeRule.INSTANCE); retVal.add(CorrelateFilterLTransposeRule.instance()); - retVal.add(CorrelateFilterRTransposeRule.instance()); - retVal.add(CorrelateProjectOnFIlterRightTransposeRule.instance()); + retVal.add(DruidFilterUnnestRule.instance()); + retVal.add(DruidFilterUnnestRule.DruidProjectOnCorrelateRule.instance()); } return retVal; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 593f91936f94..f1668a51d17d 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2744,7 +2744,8 @@ public void testUnnest() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2798,13 +2799,15 @@ public void testUnnestTwice() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ) + ), + null ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ) + ), + null ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2863,13 +2866,15 @@ public void testUnnestTwiceWithFiltersAndExpressions() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ) + ), + in("j0.unnest", ImmutableList.of("1", "2"), null) ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ) + ), + new LikeDimFilter("_j0.unnest", "_", null, null) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2890,10 +2895,6 @@ public void testUnnestTwiceWithFiltersAndExpressions() ColumnType.STRING ) ) - .filters(and( - in("j0.unnest", ImmutableList.of("1", "2"), null), - new LikeDimFilter("_j0.unnest", "_", null, null) - )) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_UNNEST) @@ -2924,7 +2925,8 @@ public void testUnnestWithGroupBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -2968,7 +2970,8 @@ public void testUnnestWithGroupByOrderBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -3023,7 +3026,8 @@ public void testUnnestWithGroupByOrderByWithLimit() new TopNQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .dimension(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) @@ -3061,7 +3065,8 @@ public void testUnnestWithGroupByHaving() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -3103,7 +3108,8 @@ public void testUnnestWithLimit() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3137,7 +3143,8 @@ public void testUnnestFirstQueryOnSelect() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3198,7 +3205,8 @@ public void testUnnestWithFilters() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3247,11 +3255,11 @@ public void testUnnestWithFiltersInsideAndOutside() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + not(selector("j0.unnest", "b", null)) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(not(selector("j0.unnest", "b", null))) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) @@ -3280,16 +3288,16 @@ public void testUnnestWithFiltersOutside() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + or( + new LikeDimFilter("j0.unnest", "_", null, null), + in("j0.unnest", ImmutableList.of("a", "c"), null) + ) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters( and( - or( - new LikeDimFilter("j0.unnest", "_", null, null), - in("j0.unnest", ImmutableList.of("a", "c"), null) - ), selector("dim2", "a", null), not(selector("dim1", "foo", null)) ) @@ -3322,7 +3330,8 @@ public void testUnnestWithInFilters() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) @@ -3358,7 +3367,8 @@ public void testUnnestVirtualWithColumns() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3401,7 +3411,8 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() "j0.unnest", "array(\"dim2\",\"dim4\")", ColumnType.STRING_ARRAY - ) + ), + null ) ) .setInterval(querySegmentSpec(Filtration.eternity())) @@ -3469,7 +3480,8 @@ public void testUnnestWithJoinOnTheLeft() "(\"dim2\" == \"j0.dim2\")", JoinType.INNER ), - expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3571,7 +3583,8 @@ public void testUnnestWithSQLFunctionOnUnnestedColumn() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3611,17 +3624,18 @@ public void testUnnestWithINFiltersWithLeftRewrite() skipVectorize(); cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b')", + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b') and m1 < 10", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) + .filters(bound("m1", null, "10", false, true, null, StringComparators.NUMERIC)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) @@ -3647,11 +3661,11 @@ public void testUnnestWithINFiltersWithNoLeftRewrite() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), + new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) @@ -3680,11 +3694,11 @@ public void testUnnestWithInvalidINFiltersOnUnnestedColumn() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + new InDimFilter("j0.unnest", ImmutableSet.of("foo", "bar"), null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(new InDimFilter("j0.unnest", ImmutableSet.of("foo", "bar"), null)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) @@ -3706,11 +3720,11 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + not(selector("j0.unnest", "d", null)) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(not(selector("j0.unnest", "d", null))) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) @@ -3750,11 +3764,11 @@ public void testUnnestWithSelectorFiltersOnSelectedColumn() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + selector("j0.unnest", "b", null) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) @@ -3766,33 +3780,4 @@ public void testUnnestWithSelectorFiltersOnSelectedColumn() ) ); } - - @Test - public void testUnnestWithSelectorFiltersOnSelectedVirtualColumn() - { - skipVectorize(); - cannotVectorize(); - testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(ARRAY[m1,m2]) as unnested (d3) where d3=1 ", - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY) - )) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "1", null)) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of("j0.unnest")) - .build() - ), - ImmutableList.of( - new Object[]{1.0f}, - new Object[]{1.0f} - ) - ); - } } From 6227595ec5dda13849b7b4f837315a1e4cfa504f Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 14 Mar 2023 16:27:59 -0700 Subject: [PATCH 07/18] Some cleanup after merging with master --- .../java/org/apache/druid/segment/UnnestStorageAdapter.java | 3 --- .../apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index b06fb0f7298a..11c38e539a4c 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -260,10 +260,7 @@ public VirtualColumn getUnnestColumn() * @param queryVirtualColumns query virtual columns passed to makeCursors * @param inputColumn input column to unnest if it's a direct access; otherwise null * @param inputColumnCapabilites input column capabilities if known; otherwise null -<<<<<<< HEAD -======= * ->>>>>>> upstream/master * @return pair of pre- and post-unnest filters */ private Pair computeBaseAndPostUnnestFilters( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java index 38dd9a476092..d0d63876867d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -68,7 +68,7 @@ public void onMatch(RelOptRuleCall call) // 9:LogicalValues(subset=[rel#115:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) // This logical project does a type cast only which Druid already has information about - // So we can skip this LogicalProject + // So we can skip this LogicalProject only if it is a CAST for strings or LITERALS for other types // Extensive unit tests can be found in {@link CalciteArraysQueryTest} static class DruidProjectOnCorrelateRule extends RelOptRule From 65a6397558b32a266af6c059553617556034c109 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 14 Mar 2023 16:33:49 -0700 Subject: [PATCH 08/18] checkstyle fix and 1 test case with selector on virtual column through the project LITERAL path --- .../sql/calcite/CalciteArraysQueryTest.java | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 4361839eb81b..00df921be03a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3780,4 +3780,33 @@ public void testUnnestWithSelectorFiltersOnSelectedColumn() ) ); } -} \ No newline at end of file + + @Test + public void testUnnestWithSelectorFiltersOnVirtualColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d12 FROM druid.numfoo, UNNEST(ARRAY[m1,m2]) as unnested (d12) where d12=1", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY), + selector("j0.unnest", "1", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{1.0f}, + new Object[]{1.0f} + ) + ); + } +} From 7e68d4cb91724073b576f12b35dec7d930fab9ba Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 14 Mar 2023 18:27:42 -0700 Subject: [PATCH 09/18] Adding support for OR filters on unnested column and other columns --- .../druid/segment/UnnestStorageAdapter.java | 19 ++++- .../sql/calcite/CalciteArraysQueryTest.java | 71 +++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 11c38e539a4c..6ace0fc14a6b 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -260,7 +260,6 @@ public VirtualColumn getUnnestColumn() * @param queryVirtualColumns query virtual columns passed to makeCursors * @param inputColumn input column to unnest if it's a direct access; otherwise null * @param inputColumnCapabilites input column capabilities if known; otherwise null - * * @return pair of pre- and post-unnest filters */ private Pair computeBaseAndPostUnnestFilters( @@ -308,7 +307,23 @@ void addPreFilter(@Nullable final Filter filter) } } } - preFilters.add(filter); + // this happens with an or filter when calcite plans both filters atop Correlate + // For example: + // SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or m1 < 2 + // Plans to: + // 116:LogicalProject(d3=[$17]) + // 114:LogicalFilter(subset=[rel#115:Subset#6.NONE.[]], condition=[OR(=($17, 'b'), <($14, 2))]) + // 112:LogicalCorrelate(subset=[rel#113:Subset#5.NONE.[]], correlation=[$cor0], joinType=[inner], requiredColumns=[{3}]) + // 8:LogicalTableScan(subset=[rel#104:Subset#0.NONE.[]], table=[[druid, numfoo]]) + // 108:Uncollect(subset=[rel#109:Subset#3.NONE.[]]) + // 106:LogicalProject(subset=[rel#107:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)]) + // 9:LogicalValues(subset=[rel#105:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) + // Run filter post-unnest if it refers to the outputColumnName + if (requiredColumns.contains(outputColumnName)) { + postFilters.add(filter); + } else { + preFilters.add(filter); + } } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 00df921be03a..beab714ef1b2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3809,4 +3809,75 @@ public void testUnnestWithSelectorFiltersOnVirtualColumn() ) ); } + + @Test + public void testUnnestWithMultipleAndFiltersOnSelectedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' and m1 < 10 and m2 < 10", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + selector("j0.unnest", "b", null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + and( + bound("m1", null, "10", false, true, null, StringComparators.NUMERIC), + bound("m2", null, "10", false, true, null, StringComparators.NUMERIC) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnSelectedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or m1 < 2 ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + or( + selector("j0.unnest", "b", null), + bound("m1", null, "2", false, true, null, StringComparators.NUMERIC) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } } From 1bd289bafd0937279e2cb5b9a84c2c7c0462e6bc Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 14 Mar 2023 21:17:51 -0700 Subject: [PATCH 10/18] Redesigning or filters for unnest, now an or will be optimized by creating a new or on input dimension with the entire filter appearing post unnest also --- .../druid/segment/UnnestStorageAdapter.java | 71 ++++++++++-------- .../sql/calcite/CalciteArraysQueryTest.java | 72 +++++++++++++++++++ 2 files changed, 115 insertions(+), 28 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 6ace0fc14a6b..6e2a4c9523f0 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -19,6 +19,7 @@ package org.apache.druid.segment; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.apache.druid.java.util.common.Pair; @@ -39,6 +40,7 @@ import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.LikeFilter; import org.apache.druid.segment.filter.NotFilter; +import org.apache.druid.segment.filter.OrFilter; import org.apache.druid.segment.filter.SelectorFilter; import org.apache.druid.segment.join.PostJoinCursor; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; @@ -275,16 +277,18 @@ class FilterSplitter final List preFilters = new ArrayList<>(); final List postFilters = new ArrayList<>(); - void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter) + void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters) { if (filter == null) { return; } - final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); - if (newFilter != null) { - // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting - // any rows that do not match this filter at all. - preFilters.add(newFilter); + if (!skipPreFilters) { + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); + if (newFilter != null) { + // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting + // any rows that do not match this filter at all. + preFilters.add(newFilter); + } } // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. postFilters.add(filter); @@ -307,42 +311,53 @@ void addPreFilter(@Nullable final Filter filter) } } } - // this happens with an or filter when calcite plans both filters atop Correlate - // For example: - // SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or m1 < 2 - // Plans to: - // 116:LogicalProject(d3=[$17]) - // 114:LogicalFilter(subset=[rel#115:Subset#6.NONE.[]], condition=[OR(=($17, 'b'), <($14, 2))]) - // 112:LogicalCorrelate(subset=[rel#113:Subset#5.NONE.[]], correlation=[$cor0], joinType=[inner], requiredColumns=[{3}]) - // 8:LogicalTableScan(subset=[rel#104:Subset#0.NONE.[]], table=[[druid, numfoo]]) - // 108:Uncollect(subset=[rel#109:Subset#3.NONE.[]]) - // 106:LogicalProject(subset=[rel#107:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)]) - // 9:LogicalValues(subset=[rel#105:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) - // Run filter post-unnest if it refers to the outputColumnName - if (requiredColumns.contains(outputColumnName)) { - postFilters.add(filter); - } else { - preFilters.add(filter); - } + preFilters.add(filter); + } } final FilterSplitter filterSplitter = new FilterSplitter(); - if (queryFilter instanceof AndFilter) { - for (Filter filter : ((AndFilter) queryFilter).getFilters()) { - filterSplitter.addPreFilter(filter); + // non-unnest case + // OR CASE + List preFilterList = new ArrayList<>(); + List postFilterList = new ArrayList<>(); + if (queryFilter instanceof OrFilter) { + for (Filter filter : ((OrFilter) queryFilter).getFilters()) { + if (filter.getRequiredColumns().contains(outputColumnName)) { + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); + if (newFilter != null) { + preFilterList.add(newFilter); + postFilterList.add(newFilter); + } else { + postFilterList.add(filter); + } + } else { + preFilterList.add(filter); + } } + } + if (!preFilterList.isEmpty()) { + OrFilter orFilter = new OrFilter(preFilterList); + filterSplitter.addPreFilter(orFilter); } else { filterSplitter.addPreFilter(queryFilter); } + if (!postFilterList.isEmpty()) { + AndFilter andFilter = new AndFilter(postFilterList); + andFilter = new AndFilter(ImmutableList.of(andFilter, queryFilter)); + filterSplitter.addPostFilterWithPreFilterIfRewritePossible(andFilter, true); + } + + + // unnest case if (unnestFilter instanceof AndFilter) { for (Filter filter : ((AndFilter) unnestFilter).getFilters()) { - filterSplitter.addPostFilterWithPreFilterIfRewritePossible(filter); + filterSplitter.addPostFilterWithPreFilterIfRewritePossible(filter, false); } } else { - filterSplitter.addPostFilterWithPreFilterIfRewritePossible(unnestFilter); + filterSplitter.addPostFilterWithPreFilterIfRewritePossible(unnestFilter, false); } return Pair.of( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index beab714ef1b2..239626937862 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3810,6 +3810,38 @@ public void testUnnestWithSelectorFiltersOnVirtualColumn() ); } + @Test + public void testUnnestWithSelectorFiltersOnVirtualStringColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','ab')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), + new InDimFilter("j0.unnest", ImmutableSet.of("a", "ab"), null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"ab"}, + new Object[]{"a"}, + new Object[]{"ab"} + ) + ); + } + @Test public void testUnnestWithMultipleAndFiltersOnSelectedColumns() { @@ -3852,6 +3884,11 @@ public void testUnnestWithMultipleOrFiltersOnSelectedColumns() cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or m1 < 2 ", + // go over each filter in the OR + // d3 -> dim3 + // recreate an or filter with dim3 replacing d3 + // push this or filter to base cursor + // original filter goes to post QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3880,4 +3917,39 @@ public void testUnnestWithMultipleOrFiltersOnSelectedColumns() ) ); } + + @Test + public void testUnnestWithMultipleOrFiltersOnSelectedVirtualColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','aa') or m1 < 2 ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + or( + bound("m1", null, "2", false, true, null, StringComparators.NUMERIC), + new InDimFilter("j0.unnest", ImmutableSet.of("a", "aa"), null) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"aa"} + ) + ); + } } From f930890c551cbc4d12c6d1fe3841233b0bca5461 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 15 Mar 2023 14:09:25 -0700 Subject: [PATCH 11/18] Refactoring for OR filter case and adding more comments and examples --- .../apache/druid/query/UnnestDataSource.java | 32 +++-- .../druid/segment/UnnestStorageAdapter.java | 125 ++++++++++++------ .../calcite/rule/DruidFilterUnnestRule.java | 8 +- .../druid/sql/calcite/rule/DruidRules.java | 2 +- .../sql/calcite/CalciteArraysQueryTest.java | 73 ++++++++++ 5 files changed, 178 insertions(+), 62 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 49fb9ed18631..acd984b64422 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -181,6 +181,17 @@ public DataSourceAnalysis getAnalysis() return current.getAnalysis(); } + + @Override + public String toString() + { + return "UnnestDataSource{" + + "base=" + base + + ", column='" + virtualColumn + '\'' + + ", unnestFilter='" + unnestFilter + '\'' + + '}'; + } + @Override public boolean equals(Object o) { @@ -191,12 +202,10 @@ public boolean equals(Object o) return false; } UnnestDataSource that = (UnnestDataSource) o; - if (unnestFilter != null) { - return virtualColumn.equals(that.virtualColumn) && unnestFilter.equals(that.unnestFilter) - && base.equals(that.base); - } else { - return virtualColumn.equals(that.virtualColumn) && base.equals(that.base); - } + return base.equals(that.base) && virtualColumn.equals(that.virtualColumn) && Objects.equals( + unnestFilter, + that.unnestFilter + ); } @Override @@ -204,17 +213,6 @@ public int hashCode() { return Objects.hash(base, virtualColumn, unnestFilter); } - - @Override - public String toString() - { - return "UnnestDataSource{" + - "base=" + base + - ", column='" + virtualColumn + '\'' + - ", unnestFilter='" + unnestFilter + '\'' + - '}'; - } - } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 6e2a4c9523f0..ad75b9d9fc90 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -19,7 +19,6 @@ package org.apache.druid.segment; -import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.apache.druid.java.util.common.Pair; @@ -272,10 +271,48 @@ private Pair computeBaseAndPostUnnestFilters( @Nullable final ColumnCapabilities inputColumnCapabilites ) { + /* + The goal of this function is to take a filter from the top of Correlate (queryFilter) + and a filter from the top of Uncollect (here unnest filter) and then do a rewrite + to generate filters to be passed to base cursor (pre-filters) and unnest cursor (post-filters) + based on the following scenarios: + + 1. If there is an AND filter between unnested column and left e.g. select * from foo, UNNEST(dim3) as u(d3) where d3 IN (a,b) and m1 < 10 + query filter -> m1 < 10 + unnest filter -> d3 IN (a,b) + + Output should be: + pre-filter -> dim3 IN (a,b) AND m1 < 10 + post-filter -> d3 IN (a,b) + + 2. There is an AND filter between unnested column and left e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) where d12 IN (a,b) and m1 < 10 + query filter -> m1 < 10 + unnest filter -> d12 IN (a,b) + + Output should be: + pre-filter -> m1 < 10 (as unnest is on a virtual column it cannot be added to the pre-filter) + post-filter -> d12 IN (a,b) + + 3. There is an OR filter involving unnested and left column e.g. select * from foo, UNNEST(dim3) as u(d3) where d3 IN (a,b) or m1 < 10 + query filter -> d3 IN (a,b) or m1 < 10 + unnest filter -> null + + Output should be: + query filter -> dim3 IN (a,b) or m1 < 10 + post filter -> d3 IN (a,b) or m1 < 10 + + 4. There is an OR filter involving unnested and left column e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) where d12 IN (a,b) or m1 < 10 + query filter -> d12 IN (a,b) or m1 < 10 + unnest filter -> null + + Output should be: + query filter -> null (as the filter cannot be re-written due to presence of virtual columns) + post filter -> d12 IN (a,b) or m1 < 10 + */ class FilterSplitter { - final List preFilters = new ArrayList<>(); - final List postFilters = new ArrayList<>(); + final List filtersOnLeftDataSource = new ArrayList<>(); + final List filtersOnUncollect = new ArrayList<>(); void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters) { @@ -287,11 +324,11 @@ void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, if (newFilter != null) { // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting // any rows that do not match this filter at all. - preFilters.add(newFilter); + filtersOnLeftDataSource.add(newFilter); } } // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. - postFilters.add(filter); + filtersOnUncollect.add(filter); } void addPreFilter(@Nullable final Filter filter) @@ -302,56 +339,63 @@ void addPreFilter(@Nullable final Filter filter) final Set requiredColumns = filter.getRequiredColumns(); - // Run filter post-unnest if it refers to any virtual columns. + // Run filter post-unnest if it refers to any virtual columns. This is a conservative judgement call + // that perhaps forces the code to use a ValueMatcher where an index would've been available, + // which can have real performance implications. This is an interim choice made to value correctness + // over performance. When we need to optimize this performance, we should be able to + // create a VirtualColumnDatasource that contains all the virtual columns, in which case the query + // itself would stop carrying them and everything should be able to be pushed down. if (queryVirtualColumns.getVirtualColumns().length > 0) { for (String column : requiredColumns) { if (queryVirtualColumns.exists(column)) { - postFilters.add(filter); + filtersOnUncollect.add(filter); return; } } } - preFilters.add(filter); + filtersOnLeftDataSource.add(filter); } } final FilterSplitter filterSplitter = new FilterSplitter(); - // non-unnest case - // OR CASE - List preFilterList = new ArrayList<>(); - List postFilterList = new ArrayList<>(); - if (queryFilter instanceof OrFilter) { - for (Filter filter : ((OrFilter) queryFilter).getFilters()) { - if (filter.getRequiredColumns().contains(outputColumnName)) { - final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); - if (newFilter != null) { - preFilterList.add(newFilter); - postFilterList.add(newFilter); - } else { - postFilterList.add(filter); + if (queryFilter != null) { + List preFilterList = new ArrayList<>(); + final int origFilterSize; + if (queryFilter.getRequiredColumns().contains(outputColumnName)) { + // outside filter contains unnested column + // requires check for OR + if (queryFilter instanceof OrFilter) { + origFilterSize = ((OrFilter) queryFilter).getFilters().size(); + for (Filter filter : ((OrFilter) queryFilter).getFilters()) { + if (filter.getRequiredColumns().contains(outputColumnName)) { + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible( + filter, + inputColumn, + inputColumnCapabilites + ); + if (newFilter != null) { + preFilterList.add(newFilter); + } + } else { + preFilterList.add(filter); + } + } + if (preFilterList.size() == origFilterSize) { + // there has been successful rewrites + final OrFilter preOrFilter = new OrFilter(preFilterList); + filterSplitter.addPreFilter(preOrFilter); } - } else { - preFilterList.add(filter); + // add the entire query filter to unnest filter to be used in Value matcher + filterSplitter.addPostFilterWithPreFilterIfRewritePossible(queryFilter, true); } + } else { + // normal case without any filter on unnested column + // add everything to pre-filters + filterSplitter.addPreFilter(queryFilter); } } - if (!preFilterList.isEmpty()) { - OrFilter orFilter = new OrFilter(preFilterList); - filterSplitter.addPreFilter(orFilter); - } else { - filterSplitter.addPreFilter(queryFilter); - } - - if (!postFilterList.isEmpty()) { - AndFilter andFilter = new AndFilter(postFilterList); - andFilter = new AndFilter(ImmutableList.of(andFilter, queryFilter)); - filterSplitter.addPostFilterWithPreFilterIfRewritePossible(andFilter, true); - } - - - // unnest case if (unnestFilter instanceof AndFilter) { for (Filter filter : ((AndFilter) unnestFilter).getFilters()) { filterSplitter.addPostFilterWithPreFilterIfRewritePossible(filter, false); @@ -361,11 +405,12 @@ void addPreFilter(@Nullable final Filter filter) } return Pair.of( - Filters.maybeAnd(filterSplitter.preFilters).orElse(null), - Filters.maybeAnd(filterSplitter.postFilters).orElse(null) + Filters.maybeAnd(filterSplitter.filtersOnLeftDataSource).orElse(null), + Filters.maybeAnd(filterSplitter.filtersOnUncollect).orElse(null) ); } + /** * Returns the input of {@link #unnestColumn}, if it's a direct access; otherwise returns null. */ diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java index d0d63876867d..ebce45fe38c6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -71,11 +71,11 @@ public void onMatch(RelOptRuleCall call) // So we can skip this LogicalProject only if it is a CAST for strings or LITERALS for other types // Extensive unit tests can be found in {@link CalciteArraysQueryTest} - static class DruidProjectOnCorrelateRule extends RelOptRule + static class DruidProjectOnUnnestRule extends RelOptRule { - private static final DruidProjectOnCorrelateRule INSTANCE = new DruidProjectOnCorrelateRule(); + private static final DruidProjectOnUnnestRule INSTANCE = new DruidProjectOnUnnestRule(); - private DruidProjectOnCorrelateRule() + private DruidProjectOnUnnestRule() { super( operand( @@ -85,7 +85,7 @@ private DruidProjectOnCorrelateRule() ); } - public static DruidProjectOnCorrelateRule instance() + public static DruidProjectOnUnnestRule instance() { return INSTANCE; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 42110f2bccc4..52841fd99a01 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -121,7 +121,7 @@ public static List rules(PlannerContext plannerContext) retVal.add(ProjectCorrelateTransposeRule.INSTANCE); retVal.add(CorrelateFilterLTransposeRule.instance()); retVal.add(DruidFilterUnnestRule.instance()); - retVal.add(DruidFilterUnnestRule.DruidProjectOnCorrelateRule.instance()); + retVal.add(DruidFilterUnnestRule.DruidProjectOnUnnestRule.instance()); } return retVal; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 239626937862..5e0826a32550 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3918,6 +3918,76 @@ public void testUnnestWithMultipleOrFiltersOnSelectedColumns() ); } + @Test + public void testUnnestWithMultipleOrFiltersOnUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or d3='d' ", + // go over each filter in the OR + // d3 -> dim3 + // recreate an or filter with dim3 replacing d3 + // push this or filter to base cursor + // original filter goes to post + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + new InDimFilter("j0.unnest", ImmutableSet.of("b", "d"), null) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"d"} + ) + ); + } + + @Test + public void testUnnestWithMultipleOrFiltersOnSelectedNonUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where m1 < 2 or m2 < 2 ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .filters( + or( + bound("m1", null, "2", false, true, null, StringComparators.NUMERIC), + bound("m2", null, "2", false, true, null, StringComparators.NUMERIC) + ) + ) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"} + ) + ); + } + @Test public void testUnnestWithMultipleOrFiltersOnSelectedVirtualColumns() { @@ -3947,6 +4017,9 @@ public void testUnnestWithMultipleOrFiltersOnSelectedVirtualColumns() .build() ), ImmutableList.of( + new Object[]{"a"}, + new Object[]{"aa"}, + new Object[]{"a"}, new Object[]{"a"}, new Object[]{"aa"} ) From 606396da139ee5c74407a5af1e99f7d44a0ed4a7 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 15 Mar 2023 14:23:29 -0700 Subject: [PATCH 12/18] A change in the comments to be in sync with code --- .../druid/segment/UnnestStorageAdapter.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index ad75b9d9fc90..59922456abce 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -274,7 +274,7 @@ private Pair computeBaseAndPostUnnestFilters( /* The goal of this function is to take a filter from the top of Correlate (queryFilter) and a filter from the top of Uncollect (here unnest filter) and then do a rewrite - to generate filters to be passed to base cursor (pre-filters) and unnest cursor (post-filters) + to generate filters to be passed to base cursor (filtersOnLeftDataSource) and unnest cursor (filtersOnUncollect) based on the following scenarios: 1. If there is an AND filter between unnested column and left e.g. select * from foo, UNNEST(dim3) as u(d3) where d3 IN (a,b) and m1 < 10 @@ -282,32 +282,32 @@ to generate filters to be passed to base cursor (pre-filters) and unnest cursor unnest filter -> d3 IN (a,b) Output should be: - pre-filter -> dim3 IN (a,b) AND m1 < 10 - post-filter -> d3 IN (a,b) + filtersOnLeftDataSource -> dim3 IN (a,b) AND m1 < 10 + filtersOnUncollect -> d3 IN (a,b) 2. There is an AND filter between unnested column and left e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) where d12 IN (a,b) and m1 < 10 query filter -> m1 < 10 unnest filter -> d12 IN (a,b) Output should be: - pre-filter -> m1 < 10 (as unnest is on a virtual column it cannot be added to the pre-filter) - post-filter -> d12 IN (a,b) + filtersOnLeftDataSource -> m1 < 10 (as unnest is on a virtual column it cannot be added to the pre-filter) + filtersOnUncollect -> d12 IN (a,b) 3. There is an OR filter involving unnested and left column e.g. select * from foo, UNNEST(dim3) as u(d3) where d3 IN (a,b) or m1 < 10 query filter -> d3 IN (a,b) or m1 < 10 unnest filter -> null Output should be: - query filter -> dim3 IN (a,b) or m1 < 10 - post filter -> d3 IN (a,b) or m1 < 10 + filtersOnLeftDataSource -> dim3 IN (a,b) or m1 < 10 + filtersOnUncollect -> d3 IN (a,b) or m1 < 10 4. There is an OR filter involving unnested and left column e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) where d12 IN (a,b) or m1 < 10 query filter -> d12 IN (a,b) or m1 < 10 unnest filter -> null Output should be: - query filter -> null (as the filter cannot be re-written due to presence of virtual columns) - post filter -> d12 IN (a,b) or m1 < 10 + filtersOnLeftDataSource -> null (as the filter cannot be re-written due to presence of virtual columns) + filtersOnUncollect -> d12 IN (a,b) or m1 < 10 */ class FilterSplitter { From 59ce1ba2999ac355cbda5d3080ec8f60e56a6cf1 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 15 Mar 2023 17:45:21 -0700 Subject: [PATCH 13/18] New test cases with or filters and slight refactoring bu removing AndFilter instanceof checks --- .../druid/segment/UnnestStorageAdapter.java | 9 +- .../rule/CorrelateFilterLTransposeRule.java | 1 - .../sql/calcite/CalciteArraysQueryTest.java | 90 ++++++++++++++++--- 3 files changed, 81 insertions(+), 19 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 59922456abce..b88ce0d7f47f 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -34,7 +34,6 @@ import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; -import org.apache.druid.segment.filter.AndFilter; import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.filter.Filters; import org.apache.druid.segment.filter.LikeFilter; @@ -396,13 +395,7 @@ void addPreFilter(@Nullable final Filter filter) filterSplitter.addPreFilter(queryFilter); } } - if (unnestFilter instanceof AndFilter) { - for (Filter filter : ((AndFilter) unnestFilter).getFilters()) { - filterSplitter.addPostFilterWithPreFilterIfRewritePossible(filter, false); - } - } else { - filterSplitter.addPostFilterWithPreFilterIfRewritePossible(unnestFilter, false); - } + filterSplitter.addPostFilterWithPreFilterIfRewritePossible(unnestFilter, false); return Pair.of( Filters.maybeAnd(filterSplitter.filtersOnLeftDataSource).orElse(null), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java index 7b15c2cab735..3fd4baf6f5b0 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java @@ -30,7 +30,6 @@ * Rule that pulls a {@link Filter} from the left-hand side of a {@link Correlate} above the Correlate. * Allows subquery elimination. * - * @see DruidFilterUnnestRule similar, but for right-hand side filters */ public class CorrelateFilterLTransposeRule extends RelOptRule { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 5e0826a32550..35f8fe9e7996 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3884,11 +3884,6 @@ public void testUnnestWithMultipleOrFiltersOnSelectedColumns() cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or m1 < 2 ", - // go over each filter in the OR - // d3 -> dim3 - // recreate an or filter with dim3 replacing d3 - // push this or filter to base cursor - // original filter goes to post QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3918,6 +3913,39 @@ public void testUnnestWithMultipleOrFiltersOnSelectedColumns() ); } + @Test + public void testUnnestWithMultipleAndFiltersOnSelectedUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b') and d3 < 'e' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + and( + new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null), + bound("j0.unnest", null, "e", false, true, null, StringComparators.LEXICOGRAPHIC) + ) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + @Test public void testUnnestWithMultipleOrFiltersOnUnnestedColumns() { @@ -3925,11 +3953,6 @@ public void testUnnestWithMultipleOrFiltersOnUnnestedColumns() cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or d3='d' ", - // go over each filter in the OR - // d3 -> dim3 - // recreate an or filter with dim3 replacing d3 - // push this or filter to base cursor - // original filter goes to post QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3953,6 +3976,53 @@ public void testUnnestWithMultipleOrFiltersOnUnnestedColumns() ); } + @Test + public void testUnnestWithMultipleOrFiltersOnVariationsOfUnnestedColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where strlen(d3) < 2 or d3='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + or( + new ExpressionDimFilter("(strlen(\"j0.unnest\") < 2)", TestExprMacroTable.INSTANCE), + selector("j0.unnest", "d", null) + ) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""}, + new Object[]{""}, + new Object[]{""} + ) : + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{"d"}, + new Object[]{""} + ) + ); + } + @Test public void testUnnestWithMultipleOrFiltersOnSelectedNonUnnestedColumns() { From 9f5b276ebe2728db792f4b29a6a2be02aea797a8 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 15 Mar 2023 19:02:39 -0700 Subject: [PATCH 14/18] Minor nits in comment + new test --- .../druid/segment/UnnestStorageAdapter.java | 35 +++++++++--------- .../sql/calcite/CalciteArraysQueryTest.java | 36 +++++++++++++++++++ 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index b88ce0d7f47f..245e56a63629 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -257,6 +257,7 @@ public VirtualColumn getUnnestColumn() * Split queryFilter into pre- and post-correlate filters. * * @param queryFilter query filter passed to makeCursors + * @param unnestFilter filter on unnested column passed to PostUnnestCursor * @param queryVirtualColumns query virtual columns passed to makeCursors * @param inputColumn input column to unnest if it's a direct access; otherwise null * @param inputColumnCapabilites input column capabilities if known; otherwise null @@ -273,7 +274,7 @@ private Pair computeBaseAndPostUnnestFilters( /* The goal of this function is to take a filter from the top of Correlate (queryFilter) and a filter from the top of Uncollect (here unnest filter) and then do a rewrite - to generate filters to be passed to base cursor (filtersOnLeftDataSource) and unnest cursor (filtersOnUncollect) + to generate filters to be passed to base cursor (filtersPushedDownToBaseCursor) and unnest cursor (filtersForPostUnnestCursor) based on the following scenarios: 1. If there is an AND filter between unnested column and left e.g. select * from foo, UNNEST(dim3) as u(d3) where d3 IN (a,b) and m1 < 10 @@ -281,37 +282,37 @@ to generate filters to be passed to base cursor (filtersOnLeftDataSource) and un unnest filter -> d3 IN (a,b) Output should be: - filtersOnLeftDataSource -> dim3 IN (a,b) AND m1 < 10 - filtersOnUncollect -> d3 IN (a,b) + filtersPushedDownToBaseCursor -> dim3 IN (a,b) AND m1 < 10 + filtersForPostUnnestCursor -> d3 IN (a,b) 2. There is an AND filter between unnested column and left e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) where d12 IN (a,b) and m1 < 10 query filter -> m1 < 10 unnest filter -> d12 IN (a,b) Output should be: - filtersOnLeftDataSource -> m1 < 10 (as unnest is on a virtual column it cannot be added to the pre-filter) - filtersOnUncollect -> d12 IN (a,b) + filtersPushedDownToBaseCursor -> m1 < 10 (as unnest is on a virtual column it cannot be added to the pre-filter) + filtersForPostUnnestCursor -> d12 IN (a,b) 3. There is an OR filter involving unnested and left column e.g. select * from foo, UNNEST(dim3) as u(d3) where d3 IN (a,b) or m1 < 10 query filter -> d3 IN (a,b) or m1 < 10 unnest filter -> null Output should be: - filtersOnLeftDataSource -> dim3 IN (a,b) or m1 < 10 - filtersOnUncollect -> d3 IN (a,b) or m1 < 10 + filtersPushedDownToBaseCursor -> dim3 IN (a,b) or m1 < 10 + filtersForPostUnnestCursor -> d3 IN (a,b) or m1 < 10 4. There is an OR filter involving unnested and left column e.g. select * from foo, UNNEST(ARRAY[dim1,dim2]) as u(d12) where d12 IN (a,b) or m1 < 10 query filter -> d12 IN (a,b) or m1 < 10 unnest filter -> null Output should be: - filtersOnLeftDataSource -> null (as the filter cannot be re-written due to presence of virtual columns) - filtersOnUncollect -> d12 IN (a,b) or m1 < 10 + filtersPushedDownToBaseCursor -> null (as the filter cannot be re-written due to presence of virtual columns) + filtersForPostUnnestCursor -> d12 IN (a,b) or m1 < 10 */ class FilterSplitter { - final List filtersOnLeftDataSource = new ArrayList<>(); - final List filtersOnUncollect = new ArrayList<>(); + final List filtersPushedDownToBaseCursor = new ArrayList<>(); + final List filtersForPostUnnestCursor = new ArrayList<>(); void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, boolean skipPreFilters) { @@ -323,11 +324,11 @@ void addPostFilterWithPreFilterIfRewritePossible(@Nullable final Filter filter, if (newFilter != null) { // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting // any rows that do not match this filter at all. - filtersOnLeftDataSource.add(newFilter); + filtersPushedDownToBaseCursor.add(newFilter); } } // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. - filtersOnUncollect.add(filter); + filtersForPostUnnestCursor.add(filter); } void addPreFilter(@Nullable final Filter filter) @@ -347,12 +348,12 @@ void addPreFilter(@Nullable final Filter filter) if (queryVirtualColumns.getVirtualColumns().length > 0) { for (String column : requiredColumns) { if (queryVirtualColumns.exists(column)) { - filtersOnUncollect.add(filter); + filtersForPostUnnestCursor.add(filter); return; } } } - filtersOnLeftDataSource.add(filter); + filtersPushedDownToBaseCursor.add(filter); } } @@ -398,8 +399,8 @@ void addPreFilter(@Nullable final Filter filter) filterSplitter.addPostFilterWithPreFilterIfRewritePossible(unnestFilter, false); return Pair.of( - Filters.maybeAnd(filterSplitter.filtersOnLeftDataSource).orElse(null), - Filters.maybeAnd(filterSplitter.filtersOnUncollect).orElse(null) + Filters.maybeAnd(filterSplitter.filtersPushedDownToBaseCursor).orElse(null), + Filters.maybeAnd(filterSplitter.filtersForPostUnnestCursor).orElse(null) ); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 35f8fe9e7996..6d42c3f54d1e 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -4095,4 +4095,40 @@ public void testUnnestWithMultipleOrFiltersOnSelectedVirtualColumns() ) ); } + + @Test + public void testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' or dim3='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters( + or( + selector("j0.unnest", "b", null), + selector("dim3", "d", null) + ) + ) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"d"} + ) + ); + } } From 3838d6c007a04b951abbd83cb79e69ec29ccf507 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Wed, 15 Mar 2023 20:17:12 -0700 Subject: [PATCH 15/18] More tests now in storage adapter to check filers and improve coverage --- .../druid/segment/UnnestStorageAdapter.java | 8 +- .../segment/UnnestStorageAdapterTest.java | 116 ++++++++++++++++++ 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 245e56a63629..3814baffb1ef 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -89,7 +89,7 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { - final String inputColumn = getUnnestInputIfDirectAccess(); + final String inputColumn = getUnnestInputIfDirectAccess(unnestColumn); final Pair filterPair = computeBaseAndPostUnnestFilters( filter, unnestFilter != null ? unnestFilter.toFilter() : null, @@ -263,7 +263,7 @@ public VirtualColumn getUnnestColumn() * @param inputColumnCapabilites input column capabilities if known; otherwise null * @return pair of pre- and post-unnest filters */ - private Pair computeBaseAndPostUnnestFilters( + public Pair computeBaseAndPostUnnestFilters( @Nullable final Filter queryFilter, @Nullable final Filter unnestFilter, final VirtualColumns queryVirtualColumns, @@ -409,7 +409,7 @@ void addPreFilter(@Nullable final Filter filter) * Returns the input of {@link #unnestColumn}, if it's a direct access; otherwise returns null. */ @Nullable - private String getUnnestInputIfDirectAccess() + public String getUnnestInputIfDirectAccess(VirtualColumn unnestColumn) { if (unnestColumn instanceof ExpressionVirtualColumn) { return ((ExpressionVirtualColumn) unnestColumn).getParsedExpression().get().getBindingIfIdentifier(); @@ -420,7 +420,7 @@ private String getUnnestInputIfDirectAccess() /** * Rewrites a filter on {@link #outputColumnName} to operate on the input column from - * {@link #getUnnestInputIfDirectAccess()}, if possible. + * if possible. */ @Nullable private Filter rewriteFilterOnUnnestColumnIfPossible( diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index e47f09e54e1e..6684de923474 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -21,13 +21,18 @@ import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.filter.OrFilter; +import org.apache.druid.segment.filter.SelectorFilter; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; @@ -55,6 +60,7 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest private static IncrementalIndexStorageAdapter INCREMENTAL_INDEX_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER1; + private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER2; private static List ADAPTERS; private static String COLUMNNAME = "multi-string1"; private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1"; @@ -92,6 +98,13 @@ public static void setup() null ); + UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( + INCREMENTAL_INDEX_STORAGE_ADAPTER, + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null) + ); + + ADAPTERS = ImmutableList.of( UNNEST_STORAGE_ADAPTER, UNNEST_STORAGE_ADAPTER1 @@ -247,4 +260,107 @@ private static void assertColumnReadsIdentifier(final VirtualColumn column, fina MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression()); } + + @Test + public void test_unnest_adapters_with_no_base_filter_active_unnest_filter() + { + + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER2.makeCursors( + null, + UNNEST_STORAGE_ADAPTER2.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + cursorSequence.accumulate(null, (accumulated, cursor) -> { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + + DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); + int count = 0; + while (!cursor.isDone()) { + Object dimSelectorVal = dimSelector.getObject(); + if (dimSelectorVal == null) { + Assert.assertNull(dimSelectorVal); + } + cursor.advance(); + count++; + } + Assert.assertEquals(1, count); + Filter unnestFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(); + VirtualColumn vc = new ExpressionVirtualColumn( + OUTPUT_COLUMN_NAME, + "\"" + COLUMNNAME + "\"", + null, + ExprMacroTable.nil() + ); + final String inputColumn = UNNEST_STORAGE_ADAPTER2.getUnnestInputIfDirectAccess(vc); + Pair filterPair = UNNEST_STORAGE_ADAPTER2.computeBaseAndPostUnnestFilters( + null, + unnestFilter, + VirtualColumns.EMPTY, + inputColumn, + INCREMENTAL_INDEX_STORAGE_ADAPTER.getColumnCapabilities(inputColumn) + ); + SelectorFilter left = ((SelectorFilter) filterPair.lhs); + SelectorFilter right = ((SelectorFilter) filterPair.rhs); + Assert.assertEquals(inputColumn, left.getDimension()); + Assert.assertEquals(OUTPUT_COLUMN_NAME, right.getDimension()); + Assert.assertEquals(right.getValue(), left.getValue()); + return null; + }); + } + + @Test + public void test_unnest_adapters_with_base_or_filter_no_unnest_filter() + { + VirtualColumn vc = new ExpressionVirtualColumn( + OUTPUT_COLUMN_NAME, + "\"" + COLUMNNAME + "\"", + null, + ExprMacroTable.nil() + ); + final String inputColumn = UNNEST_STORAGE_ADAPTER.getUnnestInputIfDirectAccess(vc); + final OrFilter baseFilter = new OrFilter(ImmutableList.of( + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(), + new SelectorDimFilter(inputColumn, "2", null).toFilter() + )); + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( + baseFilter, + UNNEST_STORAGE_ADAPTER.getInterval(), + VirtualColumns.EMPTY, + Granularities.ALL, + false, + null + ); + + cursorSequence.accumulate(null, (accumulated, cursor) -> { + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + + DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); + int count = 0; + while (!cursor.isDone()) { + Object dimSelectorVal = dimSelector.getObject(); + if (dimSelectorVal == null) { + Assert.assertNull(dimSelectorVal); + } + cursor.advance(); + count++; + } + + Pair filterPair = UNNEST_STORAGE_ADAPTER2.computeBaseAndPostUnnestFilters( + baseFilter, + null, + VirtualColumns.EMPTY, + inputColumn, + INCREMENTAL_INDEX_STORAGE_ADAPTER.getColumnCapabilities(inputColumn) + ); + OrFilter left = ((OrFilter) filterPair.lhs); + OrFilter right = ((OrFilter) filterPair.rhs); + Assert.assertEquals("(multi-string1 = 1 || multi-string1 = 2)", left.toString()); + Assert.assertEquals("(unnested-multi-string1 = 1 || multi-string1 = 2)", right.toString()); + return null; + }); + } } From b3c02e7f5065cb4929bcfa3dd3bb319e397b4f3f Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Thu, 16 Mar 2023 10:26:39 -0700 Subject: [PATCH 16/18] Using matches in the rule now --- .../segment/UnnestStorageAdapterTest.java | 1 + .../calcite/rule/DruidFilterUnnestRule.java | 14 ++++--- .../sql/calcite/CalciteArraysQueryTest.java | 37 +++++++++++++++++++ 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 6684de923474..646eea9d4904 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -274,6 +274,7 @@ public void test_unnest_adapters_with_no_base_filter_active_unnest_filter() null ); + cursorSequence.accumulate(null, (accumulated, cursor) -> { ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java index ebce45fe38c6..c732caaa2aed 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidFilterUnnestRule.java @@ -91,15 +91,19 @@ public static DruidProjectOnUnnestRule instance() } @Override - public void onMatch(RelOptRuleCall call) + public boolean matches(RelOptRuleCall call) { final Project rightP = call.rel(0); final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind(); - final DruidUnnestRel unnestDatasourceRel = call.rel(1); + // allow rule to trigger only if there's a string CAST or numeric literal cast + return rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL); + } - if (rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL)) { - call.transformTo(unnestDatasourceRel); - } + @Override + public void onMatch(RelOptRuleCall call) + { + final DruidUnnestRel unnestDatasourceRel = call.rel(1); + call.transformTo(unnestDatasourceRel); } } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6d42c3f54d1e..80d7a5636cc6 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -4131,4 +4131,41 @@ public void testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumn( ) ); } + + @Test + public void testUnnestWithMultipleOrFiltersOnUnnestedColumnsAndOnOriginalColumnDiffOrdering() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT dim3, d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where dim3='b' or d3='a' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters( + or( + selector("dim3", "b", null), + selector("j0.unnest", "a", null) + ) + ) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("dim3", "j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"[\"a\",\"b\"]", "a"}, + new Object[]{"[\"a\",\"b\"]", "b"}, + new Object[]{"[\"b\",\"c\"]", "b"}, + new Object[]{"[\"b\",\"c\"]", "c"} + ) + ); + } } From 96524df4863ad3b58c43793a89c9659fd76c4cbe Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 17 Mar 2023 13:10:46 -0700 Subject: [PATCH 17/18] Fixing a NPE --- .../java/org/apache/druid/segment/UnnestDimensionCursor.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 6c4c1a953439..d69eca6109d1 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -142,6 +142,9 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean matches() { + if (indexedIntsForCurrentRow == null) { + return false; + } if (indexedIntsForCurrentRow.size() <= 0) { return false; } From b446228a09039af9b0f0db55d4e9e1e152d79edb Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 21 Mar 2023 19:00:03 -0700 Subject: [PATCH 18/18] Refactoring in tests to allow validating the filters pushed down to the base cursor while using unnest with or and regular filters --- .../druid/segment/UnnestStorageAdapter.java | 11 ++ .../druid/segment/join/PostJoinCursor.java | 10 ++ .../segment/UnnestStorageAdapterTest.java | 163 +++++++++++------- 3 files changed, 118 insertions(+), 66 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 3814baffb1ef..8506a99d026b 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -60,6 +60,11 @@ */ public class UnnestStorageAdapter implements StorageAdapter { + public StorageAdapter getBaseAdapter() + { + return baseAdapter; + } + private final StorageAdapter baseAdapter; private final VirtualColumn unnestColumn; private final String outputColumnName; @@ -175,6 +180,12 @@ public Iterable getAvailableMetrics() return baseAdapter.getAvailableMetrics(); } + @Nullable + public Filter getUnnestFilter() + { + return unnestFilter.toFilter(); + } + @Override public int getDimensionCardinality(String column) { diff --git a/processing/src/main/java/org/apache/druid/segment/join/PostJoinCursor.java b/processing/src/main/java/org/apache/druid/segment/join/PostJoinCursor.java index d003da0bb320..11370fc167b2 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/PostJoinCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/join/PostJoinCursor.java @@ -41,6 +41,9 @@ public class PostJoinCursor implements Cursor @Nullable private final ValueMatcher valueMatcher; + @Nullable + private final Filter postJoinFilter; + private PostJoinCursor(Cursor baseCursor, VirtualColumns virtualColumns, @Nullable Filter filter) { this.baseCursor = baseCursor; @@ -52,6 +55,7 @@ private PostJoinCursor(Cursor baseCursor, VirtualColumns virtualColumns, @Nullab } else { this.valueMatcher = filter.makeMatcher(this.columnSelectorFactory); } + this.postJoinFilter = filter; } public static PostJoinCursor wrap( @@ -86,6 +90,12 @@ public DateTime getTime() return baseCursor.getTime(); } + @Nullable + public Filter getPostJoinFilter() + { + return postJoinFilter; + } + @Override public void advance() { diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 646eea9d4904..f34dcc2b6b2e 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -21,23 +21,24 @@ import com.google.common.collect.ImmutableList; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.query.QueryMetrics; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.filter.OrFilter; -import org.apache.druid.segment.filter.SelectorFilter; import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.SegmentGenerator; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.join.PostJoinCursor; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.DataSegment; @@ -45,11 +46,13 @@ import org.apache.druid.utils.CloseableUtils; import org.hamcrest.CoreMatchers; import org.hamcrest.MatcherAssert; +import org.joda.time.Interval; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import javax.annotation.Nullable; import java.util.Arrays; import java.util.List; @@ -262,83 +265,86 @@ private static void assertColumnReadsIdentifier(final VirtualColumn column, fina } @Test - public void test_unnest_adapters_with_no_base_filter_active_unnest_filter() + public void test_pushdown_or_filters_unnested_and_original_dimension_with_unnest_adapters() { + final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( + new TestStorageAdapter(INCREMENTAL_INDEX), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + null + ); - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER2.makeCursors( - null, - UNNEST_STORAGE_ADAPTER2.getInterval(), + final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn(); + + final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); + + final OrFilter baseFilter = new OrFilter(ImmutableList.of( + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(), + new SelectorDimFilter(inputColumn, "2", null).toFilter() + )); + + final OrFilter expectedPushDownFilter = new OrFilter(ImmutableList.of( + new SelectorDimFilter(inputColumn, "1", null).toFilter(), + new SelectorDimFilter(inputColumn, "2", null).toFilter() + )); + + final Sequence cursorSequence = unnestStorageAdapter.makeCursors( + baseFilter, + unnestStorageAdapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null ); + final TestStorageAdapter base = (TestStorageAdapter) unnestStorageAdapter.getBaseAdapter(); + final Filter pushDownFilter = base.getPushDownFilter(); + Assert.assertEquals(expectedPushDownFilter, pushDownFilter); cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } - cursor.advance(); - count++; - } - Assert.assertEquals(1, count); - Filter unnestFilter = new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(); - VirtualColumn vc = new ExpressionVirtualColumn( - OUTPUT_COLUMN_NAME, - "\"" + COLUMNNAME + "\"", - null, - ExprMacroTable.nil() - ); - final String inputColumn = UNNEST_STORAGE_ADAPTER2.getUnnestInputIfDirectAccess(vc); - Pair filterPair = UNNEST_STORAGE_ADAPTER2.computeBaseAndPostUnnestFilters( - null, - unnestFilter, - VirtualColumns.EMPTY, - inputColumn, - INCREMENTAL_INDEX_STORAGE_ADAPTER.getColumnCapabilities(inputColumn) - ); - SelectorFilter left = ((SelectorFilter) filterPair.lhs); - SelectorFilter right = ((SelectorFilter) filterPair.rhs); - Assert.assertEquals(inputColumn, left.getDimension()); - Assert.assertEquals(OUTPUT_COLUMN_NAME, right.getDimension()); - Assert.assertEquals(right.getValue(), left.getValue()); + Assert.assertEquals(cursor.getClass(), PostJoinCursor.class); + final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter(); + // OR-case so base filter should match the postJoinFilter + Assert.assertEquals(baseFilter, postFilter); return null; }); } @Test - public void test_unnest_adapters_with_base_or_filter_no_unnest_filter() + public void test_pushdown_filters_unnested_dimension_with_unnest_adapters() { - VirtualColumn vc = new ExpressionVirtualColumn( - OUTPUT_COLUMN_NAME, - "\"" + COLUMNNAME + "\"", - null, - ExprMacroTable.nil() + final UnnestStorageAdapter unnestStorageAdapter = new UnnestStorageAdapter( + new TestStorageAdapter(INCREMENTAL_INDEX), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), + new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null) ); - final String inputColumn = UNNEST_STORAGE_ADAPTER.getUnnestInputIfDirectAccess(vc); - final OrFilter baseFilter = new OrFilter(ImmutableList.of( - new SelectorDimFilter(OUTPUT_COLUMN_NAME, "1", null).toFilter(), - new SelectorDimFilter(inputColumn, "2", null).toFilter() - )); - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER.makeCursors( - baseFilter, - UNNEST_STORAGE_ADAPTER.getInterval(), + + final VirtualColumn vc = unnestStorageAdapter.getUnnestColumn(); + + final String inputColumn = unnestStorageAdapter.getUnnestInputIfDirectAccess(vc); + + final Filter expectedPushDownFilter = + new SelectorDimFilter(inputColumn, "1", null).toFilter(); + + + final Sequence cursorSequence = unnestStorageAdapter.makeCursors( + null, + unnestStorageAdapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null ); + final TestStorageAdapter base = (TestStorageAdapter) unnestStorageAdapter.getBaseAdapter(); + final Filter pushDownFilter = base.getPushDownFilter(); + + Assert.assertEquals(expectedPushDownFilter, pushDownFilter); cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); + Assert.assertEquals(cursor.getClass(), PostJoinCursor.class); + final Filter postFilter = ((PostJoinCursor) cursor).getPostJoinFilter(); + Assert.assertEquals(unnestStorageAdapter.getUnnestFilter(), postFilter); + ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); int count = 0; while (!cursor.isDone()) { @@ -349,19 +355,44 @@ public void test_unnest_adapters_with_base_or_filter_no_unnest_filter() cursor.advance(); count++; } - - Pair filterPair = UNNEST_STORAGE_ADAPTER2.computeBaseAndPostUnnestFilters( - baseFilter, - null, - VirtualColumns.EMPTY, - inputColumn, - INCREMENTAL_INDEX_STORAGE_ADAPTER.getColumnCapabilities(inputColumn) - ); - OrFilter left = ((OrFilter) filterPair.lhs); - OrFilter right = ((OrFilter) filterPair.rhs); - Assert.assertEquals("(multi-string1 = 1 || multi-string1 = 2)", left.toString()); - Assert.assertEquals("(unnested-multi-string1 = 1 || multi-string1 = 2)", right.toString()); + Assert.assertEquals(1, count); return null; }); } } + +/** + * Class to test the flow of pushing down filters into the base cursor + * while using the UnnestStorageAdapter. This class keeps a reference of the filter + * which is pushed down to the cursor which serves as a checkpoint to validate + * if the right filter is being pushed down + */ +class TestStorageAdapter extends IncrementalIndexStorageAdapter +{ + + private Filter pushDownFilter; + + public TestStorageAdapter(IncrementalIndex index) + { + super(index); + } + + public Filter getPushDownFilter() + { + return pushDownFilter; + } + + @Override + public Sequence makeCursors( + @Nullable final Filter filter, + final Interval interval, + final VirtualColumns virtualColumns, + final Granularity gran, + final boolean descending, + @Nullable QueryMetrics queryMetrics + ) + { + this.pushDownFilter = filter; + return super.makeCursors(filter, interval, virtualColumns, gran, descending, queryMetrics); + } +}