From 2085351a755df118f438751343d3e8bc4276498a Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 10 Mar 2023 11:23:24 -0800 Subject: [PATCH 1/7] Refactoring and bug fixes on top of unnest. The filter now is passed inside the unnest cursors. Added tests for scenarios such as 1. filter on unnested column which involves a left filter rewrite 2. filter on unnested virtual column which pushes the filter to the right only and involves no rewrite 3. not filters 4. SQL functions applied on top of unnested column 5. null present in first row of the column to be unnested --- .../apache/druid/query/UnnestDataSource.java | 28 +- .../UnnestColumnValueSelectorCursor.java | 59 ++-- .../druid/segment/UnnestDimensionCursor.java | 78 ++---- .../druid/segment/UnnestSegmentReference.java | 10 +- .../druid/segment/UnnestStorageAdapter.java | 38 ++- .../druid/query/QueryRunnerTestHelper.java | 3 +- .../groupby/UnnestGroupByQueryRunnerTest.java | 12 +- .../query/scan/UnnestScanQueryRunnerTest.java | 89 +----- .../query/topn/UnnestTopNQueryRunnerTest.java | 6 +- .../UnnestColumnValueSelectorCursorTest.java | 75 ++--- .../segment/UnnestStorageAdapterTest.java | 172 +----------- .../calcite/rel/DruidCorrelateUnnestRel.java | 3 +- .../sql/calcite/CalciteArraysQueryTest.java | 264 +++++++++++++++--- 13 files changed, 342 insertions(+), 495 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 2bb24784adfb..d46cd226a8a5 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -29,8 +29,6 @@ import org.apache.druid.segment.VirtualColumn; import org.apache.druid.utils.JvmUtils; -import javax.annotation.Nullable; -import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -39,7 +37,6 @@ /** * The data source for representing an unnest operation. - * * An unnest data source has the following: * a base data source which is to be unnested * the column name of the MVD which will be unnested @@ -50,27 +47,23 @@ public class UnnestDataSource implements DataSource { private final DataSource base; private final VirtualColumn virtualColumn; - private final LinkedHashSet allowList; private UnnestDataSource( DataSource dataSource, - VirtualColumn virtualColumn, - LinkedHashSet allowList + VirtualColumn virtualColumn ) { this.base = dataSource; this.virtualColumn = virtualColumn; - this.allowList = allowList; } @JsonCreator public static UnnestDataSource create( @JsonProperty("base") DataSource base, - @JsonProperty("virtualColumn") VirtualColumn virtualColumn, - @Nullable @JsonProperty("allowList") LinkedHashSet allowList + @JsonProperty("virtualColumn") VirtualColumn virtualColumn ) { - return new UnnestDataSource(base, virtualColumn, allowList); + return new UnnestDataSource(base, virtualColumn); } @JsonProperty("base") @@ -85,12 +78,6 @@ public VirtualColumn getVirtualColumn() return virtualColumn; } - @JsonProperty("allowList") - public LinkedHashSet getAllowList() - { - return allowList; - } - @Override public Set getTableNames() { @@ -109,7 +96,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), virtualColumn, allowList); + return new UnnestDataSource(children.get(0), virtualColumn); } @Override @@ -146,17 +133,15 @@ public Function createSegmentMapFunction( baseSegment -> new UnnestSegmentReference( segmentMapFn.apply(baseSegment), - virtualColumn, - allowList + virtualColumn ) ); - } @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, virtualColumn, allowList); + return new UnnestDataSource(newSource, virtualColumn); } @Override @@ -203,7 +188,6 @@ public String toString() return "UnnestDataSource{" + "base=" + base + ", column='" + virtualColumn + '\'' + - ", allowList=" + allowList + '}'; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 1a3bbf4e0015..3b0a04857d25 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -22,15 +22,17 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; /** @@ -65,18 +67,20 @@ public class UnnestColumnValueSelectorCursor implements Cursor private final ColumnValueSelector columnValueSelector; private final VirtualColumn unnestColumn; private final String outputName; - private final LinkedHashSet allowSet; private int index; private Object currentVal; private List unnestListForCurrentRow; private boolean needInitialization; + private ValueMatcher valueMatcher; + @Nullable + private final Filter allowFilter; public UnnestColumnValueSelectorCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter allowFilter ) { this.baseCursor = cursor; @@ -89,7 +93,7 @@ public UnnestColumnValueSelectorCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; + this.allowFilter = allowFilter; } @Override @@ -194,11 +198,7 @@ public boolean isNull() public Object getObject() { if (!unnestListForCurrentRow.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return unnestListForCurrentRow.get(index); - } else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) { - return unnestListForCurrentRow.get(index); - } + return unnestListForCurrentRow.get(index); } return null; } @@ -253,9 +253,13 @@ public void advance() @Override public void advanceUninterruptibly() { - do { + while (true) { advanceAndUpdate(); - } while (matchAndProceed()); + boolean match = valueMatcher.matches(); + if (match || baseCursor.isDone()) { + return; + } + } } @Override @@ -310,12 +314,15 @@ private void getNextRow() private void initialize() { getNextRow(); - if (allowSet != null) { - if (!allowSet.isEmpty()) { - if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) { - advance(); - } - } + if (allowFilter != null) { + this.valueMatcher = allowFilter.makeMatcher(getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); + } + // If the first value the index is pointing to does not match the filter + // advance the index to the first value which will match + if (!valueMatcher.matches()) { + advance(); } needInitialization = false; } @@ -338,22 +345,4 @@ private void advanceAndUpdate() index++; } } - - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if (allowSet == null || allowSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index ba91f27815d8..c99995fe261e 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -23,16 +23,16 @@ import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; -import java.util.BitSet; -import java.util.LinkedHashSet; /** * The cursor to help unnest MVDs with dictionary encoding. @@ -79,8 +79,9 @@ public class UnnestDimensionCursor implements Cursor private final DimensionSelector dimSelector; private final VirtualColumn unnestColumn; private final String outputName; - private final LinkedHashSet allowSet; - private final BitSet allowedBitSet; + @Nullable + private final Filter allowFilter; + private ValueMatcher valueMatcher; private final ColumnSelectorFactory baseColumnSelectorFactory; private int index; @Nullable @@ -93,7 +94,7 @@ public UnnestDimensionCursor( ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, String outputColumnName, - LinkedHashSet allowSet + @Nullable Filter allowFilter ) { this.baseCursor = cursor; @@ -106,8 +107,7 @@ public UnnestDimensionCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; - this.allowedBitSet = new BitSet(); + this.allowFilter = allowFilter; } @Override @@ -158,6 +158,9 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean matches() { + if (indexedIntsForCurrentRow.size() <= 0) { + return false; + } return idForLookup == indexedIntsForCurrentRow.get(index); } @@ -188,14 +191,7 @@ public Object getObject() if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) { return null; } - if (allowedBitSet.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - } else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - return null; + return lookupName(indexedIntsForCurrentRow.get(index)); } @Override @@ -207,9 +203,6 @@ public Class classOfObject() @Override public int getValueCardinality() { - if (!allowedBitSet.isEmpty()) { - return allowedBitSet.cardinality(); - } return dimSelector.getValueCardinality(); } @@ -290,9 +283,13 @@ public void advance() @Override public void advanceUninterruptibly() { - do { + while (true) { advanceAndUpdate(); - } while (matchAndProceed()); + boolean match = valueMatcher.matches(); + if (match || baseCursor.isDone()) { + return; + } + } } @Override @@ -330,22 +327,19 @@ public void reset() @Nullable private void initialize() { - IdLookup idLookup = dimSelector.idLookup(); - this.indexIntsForRow = new SingleIndexInts(); - if (allowSet != null && !allowSet.isEmpty() && idLookup != null) { - for (String s : allowSet) { - if (idLookup.lookupId(s) >= 0) { - allowedBitSet.set(idLookup.lookupId(s)); - } - } + index = 0; + if (allowFilter != null) { + this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); + } else { + this.valueMatcher = BooleanValueMatcher.of(true); } + this.indexIntsForRow = new SingleIndexInts(); + if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!allowedBitSet.isEmpty()) { - if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - advance(); - } + if (!valueMatcher.matches() && !baseCursor.isDone()) { + advance(); } needInitialization = false; } @@ -362,6 +356,9 @@ private void advanceAndUpdate() index = 0; if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); + if (!baseCursor.isDone()) { + indexedIntsForCurrentRow = dimSelector.getRow(); + } } } else { if (index >= indexedIntsForCurrentRow.size() - 1) { @@ -378,23 +375,6 @@ private void advanceAndUpdate() } } - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } // Helper class to help in returning // getRow from the dimensionSelector diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index a5db64d2b2d3..8f3871390045 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -28,7 +28,6 @@ import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; -import java.util.LinkedHashSet; import java.util.Optional; /** @@ -41,17 +40,15 @@ public class UnnestSegmentReference implements SegmentReference private final SegmentReference baseSegment; private final VirtualColumn unnestColumn; - private final LinkedHashSet allowSet; + public UnnestSegmentReference( SegmentReference baseSegment, - VirtualColumn unnestColumn, - LinkedHashSet allowList + VirtualColumn unnestColumn ) { this.baseSegment = baseSegment; this.unnestColumn = unnestColumn; - this.allowSet = allowList; } @Override @@ -103,8 +100,7 @@ public StorageAdapter asStorageAdapter() { return new UnnestStorageAdapter( baseSegment.asStorageAdapter(), - unnestColumn, - allowSet + unnestColumn ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 939333c75402..7c40db0792a5 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -62,18 +62,15 @@ public class UnnestStorageAdapter implements StorageAdapter private final StorageAdapter baseAdapter; private final VirtualColumn unnestColumn; private final String outputColumnName; - private final LinkedHashSet allowSet; public UnnestStorageAdapter( final StorageAdapter baseAdapter, - final VirtualColumn unnestColumn, - final LinkedHashSet allowSet + final VirtualColumn unnestColumn ) { this.baseAdapter = baseAdapter; this.unnestColumn = unnestColumn; this.outputColumnName = unnestColumn.getOutputName(); - this.allowSet = allowSet; } @Override @@ -87,7 +84,7 @@ public Sequence makeCursors( ) { final String inputColumn = getUnnestInputIfDirectAccess(); - final Pair filterPair = computeBaseAndPostCorrelateFilters( + final Pair filterPair = computeBaseAndPostUnnestFilters( filter, virtualColumns, inputColumn, @@ -121,7 +118,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), unnestColumn, outputColumnName, - allowSet + filterPair.rhs ); } else { retVal = new UnnestColumnValueSelectorCursor( @@ -129,7 +126,7 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), unnestColumn, outputColumnName, - allowSet + filterPair.rhs ); } } else { @@ -138,13 +135,16 @@ public Sequence makeCursors( retVal.getColumnSelectorFactory(), unnestColumn, outputColumnName, - allowSet + filterPair.rhs ); } + // This is needed at this moment for nested queries + // Future developer would want to move the virtual columns + // inside the UnnestCursor and wrap the columnSelectorFactory return PostJoinCursor.wrap( retVal, virtualColumns, - filterPair.rhs + null ); } ); @@ -260,9 +260,9 @@ public VirtualColumn getUnnestColumn() * @param inputColumn input column to unnest if it's a direct access; otherwise null * @param inputColumnCapabilites input column capabilities if known; otherwise null * - * @return pair of pre- and post-correlate filters + * @return pair of pre- and post-unnest filters */ - private Pair computeBaseAndPostCorrelateFilters( + private Pair computeBaseAndPostUnnestFilters( @Nullable final Filter queryFilter, final VirtualColumns queryVirtualColumns, @Nullable final String inputColumn, @@ -282,7 +282,7 @@ void add(@Nullable final Filter filter) final Set requiredColumns = filter.getRequiredColumns(); - // Run filter post-correlate if it refers to any virtual columns. + // Run filter post-unnest if it refers to any virtual columns. if (queryVirtualColumns.getVirtualColumns().length > 0) { for (String column : requiredColumns) { if (queryVirtualColumns.exists(column)) { @@ -293,13 +293,16 @@ void add(@Nullable final Filter filter) } if (requiredColumns.contains(outputColumnName)) { - // Try to move filter pre-correlate if possible. + // Rewrite filter post-unnest if possible. final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); if (newFilter != null) { + // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting + // any rows that do not match this filter at all. preFilters.add(newFilter); - } else { - postFilters.add(filter); } + // This is needed as a filter on an MV String Dimension returns the entire row matching the filter + // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. + postFilters.add(filter); } else { preFilters.add(filter); } @@ -308,11 +311,6 @@ void add(@Nullable final Filter filter) final FilterSplitter filterSplitter = new FilterSplitter(); - if (allowSet != null && !allowSet.isEmpty()) { - // Filter on input column if possible (it may be faster); otherwise use output column. - filterSplitter.add(new InDimFilter(inputColumn != null ? inputColumn : outputColumnName, allowSet)); - } - if (queryFilter instanceof AndFilter) { for (Filter filter : ((AndFilter) queryFilter).getFilters()) { filterSplitter.add(filter); diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index aec318aa7a9e..2f4119af8f83 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -111,8 +111,7 @@ public class QueryRunnerTestHelper "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) ); public static final Granularity DAY_GRAN = Granularities.DAY; diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index 826d612f678c..a09d2fc256e9 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -239,8 +239,7 @@ public void testGroupBy() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -453,8 +452,7 @@ public void testGroupByOnMissingColumn() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions( @@ -566,8 +564,7 @@ public void testGroupByOnUnnestedVirtualColumn() "mv_to_array(placementish)", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ), - null + ) ); GroupByQuery query = makeQueryBuilder() @@ -655,8 +652,7 @@ public void testGroupByOnUnnestedVirtualMultiColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) ); GroupByQuery query = makeQueryBuilder() diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index 160f06140c40..a0fa2b929e09 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -22,7 +22,6 @@ import com.google.common.collect.Lists; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.DefaultGenericQueryMetricsFactory; import org.apache.druid.query.Druids; import org.apache.druid.query.QueryPlus; @@ -46,9 +45,7 @@ import org.junit.runners.Parameterized; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -96,27 +93,6 @@ private Druids.ScanQueryBuilder newTestUnnestQuery() .legacy(legacy); } - private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet() - { - List allowList = Arrays.asList("a", "b", "c"); - LinkedHashSet allowSet = new LinkedHashSet(allowList); - return Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - new ExpressionVirtualColumn( - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", - null, - ExprMacroTable.nil() - ), - allowSet - )) - .columns(Collections.emptyList()) - .eternityInterval() - .limit(3) - .legacy(legacy); - } - @Test public void testScanOnUnnest() { @@ -188,8 +164,7 @@ public void testUnnestRunnerVirtualColumnsUsingSingleColumn() "mv_to_array(placementish)", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -258,8 +233,7 @@ public void testUnnestRunnerVirtualColumnsUsingMultipleColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -461,65 +435,6 @@ public void testUnnestRunnerWithOrdering() ScanQueryRunnerTest.verify(ascendingExpectedResults, results); } - @Test - public void testUnnestRunnerNonNullAllowSet() - { - ScanQuery query = newTestUnnestQueryWithAllowSet() - .intervals(I_0112_0114) - .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - .limit(3) - .build(); - - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - FACTORY, - new IncrementalIndexSegment( - index, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - - Iterable results = queryRunner.run(QueryPlus.wrap(query)).toList(); - - String[] columnNames; - if (legacy) { - columnNames = new String[]{ - getTimestampName() + ":TIME", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } else { - columnNames = new String[]{ - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } - String[] values; - if (legacy) { - values = new String[]{ - "2011-01-12T00:00:00.000Z\ta", - "2011-01-12T00:00:00.000Z\tb", - "2011-01-13T00:00:00.000Z\ta" - }; - } else { - values = new String[]{ - "a", - "b", - "a" - }; - } - - final List>> events = ScanQueryRunnerTest.toEvents(columnNames, legacy, values); - List expectedResults = toExpected( - events, - legacy - ? Lists.newArrayList(getTimestampName(), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - : Collections.singletonList(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST), - 0, - 3 - ); - ScanQueryRunnerTest.verify(expectedResults, results); - } - private String getTimestampName() { diff --git a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java index e822913489dd..c30022216bd4 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java @@ -258,8 +258,7 @@ public void testTopNStringVirtualColumnUnnest() "mv_to_array(\"placementish\")", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ), - null + ) )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) @@ -341,8 +340,7 @@ public void testTopNStringVirtualMultiColumnUnnest() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index 11b6b7e1d6a0..07364fc1a8cd 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -35,14 +35,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandlingTest { private static String OUTPUT_NAME = "unnested-column"; - private static LinkedHashSet IGNORE_SET = null; - private static LinkedHashSet IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f")); @BeforeClass public static void setUpClass() @@ -74,7 +71,7 @@ public void test_list_unnest_cursors() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -108,7 +105,7 @@ public void test_list_unnest_cursors_user_supplied_list() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -140,7 +137,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -177,7 +174,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -211,7 +208,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -241,7 +238,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -276,7 +273,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -310,14 +307,14 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"" + OUTPUT_NAME + "\"", null, ExprMacroTable.nil()), "tmp-out", - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory() .makeColumnValueSelector("tmp-out"); @@ -352,7 +349,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -390,7 +387,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -408,44 +405,6 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() Assert.assertEquals(k, 10); } - @Test - public void test_list_unnest_cursors_user_supplied_list_with_ignore_set() - { - List inputList = Arrays.asList( - Arrays.asList("a", "b", "c"), - Arrays.asList("e", "f", "g", "h", "i"), - Collections.singletonList("j") - ); - - List expectedResults = Arrays.asList("b", "f"); - - //Create base cursor - ListCursor listCursor = new ListCursor(inputList); - - //Create unnest cursor - UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( - listCursor, - listCursor.getColumnSelectorFactory(), - new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET1 - ); - ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() - .makeColumnValueSelector(OUTPUT_NAME); - int k = 0; - while (!unnestCursor.isDone()) { - Object valueSelectorVal = unnestColumnValueSelector.getObject(); - if (valueSelectorVal == null) { - Assert.assertEquals(null, expectedResults.get(k)); - } else { - Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString()); - } - k++; - unnestCursor.advance(); - } - Assert.assertEquals(k, 2); - } - @Test public void test_list_unnest_cursors_user_supplied_list_double() { @@ -466,7 +425,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -500,7 +459,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -534,7 +493,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -571,7 +530,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -607,7 +566,7 @@ public void test_list_unnest_cursors_dimSelector() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); // should return a column value selector for this case BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory() @@ -650,7 +609,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, - IGNORE_SET + null ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index e064587a05ea..d852547e4ec4 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -46,7 +46,6 @@ import org.junit.Test; import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.List; public class UnnestStorageAdapterTest extends InitializedNullHandlingTest @@ -56,13 +55,10 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest private static IncrementalIndexStorageAdapter INCREMENTAL_INDEX_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER1; - private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER2; - private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER3; private static List ADAPTERS; private static String COLUMNNAME = "multi-string1"; private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1"; private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again"; - private static LinkedHashSet IGNORE_SET = new LinkedHashSet<>(Arrays.asList("1", "3", "5")); @BeforeClass public static void setup() @@ -86,29 +82,17 @@ public static void setup() INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX); UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - null + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) ); + UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( - INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - IGNORE_SET - ); - UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - null - ); - UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter( - UNNEST_STORAGE_ADAPTER1, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - IGNORE_SET + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) ); + ADAPTERS = ImmutableList.of( UNNEST_STORAGE_ADAPTER, - UNNEST_STORAGE_ADAPTER1, - UNNEST_STORAGE_ADAPTER2, - UNNEST_STORAGE_ADAPTER3 + UNNEST_STORAGE_ADAPTER1 ); } @@ -217,9 +201,9 @@ public void test_unnest_adapters_basic() @Test public void test_two_levels_of_unnest_adapters() { - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER2.makeCursors( + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( null, - UNNEST_STORAGE_ADAPTER2.getInterval(), + UNNEST_STORAGE_ADAPTER1.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, @@ -256,148 +240,6 @@ public void test_two_levels_of_unnest_adapters() }); } - @Test - public void test_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - */ - Assert.assertEquals(count, 3); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_two_levels_of_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER3.makeCursors( - null, - UNNEST_STORAGE_ADAPTER3.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3; - assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME1)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME1); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - unnest of that unnest will have 3*3 = 9 entries - */ - Assert.assertEquals(count, 9); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_unnest_adapters_methods_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1; - assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - IdLookup idlookUp = dimSelector.idLookup(); - Assert.assertFalse(dimSelector.isNull()); - int[] indices = new int[]{1, 3, 5}; - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Assert.assertEquals(idlookUp.lookupId((String) dimSelectorVal), indices[count]); - // after unnest first entry in get row should equal the object - // and the row size will always be 1 - Assert.assertEquals(dimSelector.getRow().get(0), indices[count]); - Assert.assertEquals(dimSelector.getRow().size(), 1); - Assert.assertNotNull(dimSelector.makeValueMatcher(OUTPUT_COLUMN_NAME)); - cursor.advance(); - count++; - } - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - Assert.assertEquals(count, 3); - return null; - }); - } - private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier) { MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 10a12fbec3b3..5c5bfd8aae4e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -172,8 +172,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) correlateRowSignature.getColumnName(correlateRowSignature.size() - 1), Calcites.getColumnTypeForRelDataType(rexNodeToUnnest.getType()), getPlannerContext().getExprMacroTable() - ), - null + ) ), correlateRowSignature, getPlannerContext(), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 67d1971a16ef..f8976de7b053 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2697,6 +2697,37 @@ public void testUnnestInline() ); } + @Test + public void testUnnestInlineWithCount() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT COUNT(*) FROM (select c from UNNEST(ARRAY[1,2,3]) as unnested(c))", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + InlineDataSource.fromIterable( + ImmutableList.of( + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L} + ), + RowSignature.builder().add("EXPR$0", ColumnType.LONG).build() + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .context(QUERY_CONTEXT_UNNEST) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .build() + ), + ImmutableList.of( + new Object[]{3L} + ) + ); + } + @Test public void testUnnest() { @@ -2713,8 +2744,7 @@ public void testUnnest() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2768,15 +2798,13 @@ public void testUnnestTwice() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ), - null + ) ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ), - null + ) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2835,15 +2863,13 @@ public void testUnnestTwiceWithFiltersAndExpressions() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ), - null + ) ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ), - null + ) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2898,8 +2924,7 @@ public void testUnnestWithGroupBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -2943,8 +2968,7 @@ public void testUnnestWithGroupByOrderBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -2999,8 +3023,7 @@ public void testUnnestWithGroupByOrderByWithLimit() new TopNQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .dimension(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) @@ -3038,8 +3061,7 @@ public void testUnnestWithGroupByHaving() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -3081,8 +3103,7 @@ public void testUnnestWithLimit() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3116,8 +3137,7 @@ public void testUnnestFirstQueryOnSelect() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3178,8 +3198,7 @@ public void testUnnestWithFilters() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3228,8 +3247,7 @@ public void testUnnestWithFiltersInsideAndOutside() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3262,8 +3280,7 @@ public void testUnnestWithFiltersOutside() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3305,8 +3322,7 @@ public void testUnnestWithInFilters() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) @@ -3342,8 +3358,7 @@ public void testUnnestVirtualWithColumns() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), - null + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3386,8 +3401,7 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() "j0.unnest", "array(\"dim2\",\"dim4\")", ColumnType.STRING_ARRAY - ), - null + ) ) ) .setInterval(querySegmentSpec(Filtration.eternity())) @@ -3455,8 +3469,7 @@ public void testUnnestWithJoinOnTheLeft() "(\"dim2\" == \"j0.dim2\")", JoinType.INNER ), - expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3545,4 +3558,183 @@ public void testUnnestWithConstant() ) ); } + + @Test + public void testUnnestWithSQLFunctionOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT strlen(d3) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .virtualColumns(expressionVirtualColumn("v0", "strlen(\"j0.unnest\")", ColumnType.LONG)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("v0")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{0}, + new Object[]{0}, + new Object[]{0} + ) : + ImmutableList.of( + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{0}, + new Object[]{null}, + new Object[]{null} + ) + ); + } + + @Test + public void testUnnestWithINFiltersWithLeftRewrite() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithINFiltersWithNoLeftRewrite() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','b')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithInvalidINFiltersOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('foo','bar')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("foo", "bar"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of() + ); + } + + @Test + public void testUnnestWithNotFiltersOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3!='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(not(selector("j0.unnest", "d", null))) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{""}, + new Object[]{""}, + new Object[]{""} + ) : + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{""}, + new Object[]{null}, + new Object[]{null} + ) + ); + } } From cf0f6a2ad6d4db9c857807a84fe773f764bd3838 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Fri, 10 Mar 2023 17:42:33 -0800 Subject: [PATCH 2/7] Not pushing filters in now, will be done if needed later when we migrate the filter inside the data source --- .../UnnestColumnValueSelectorCursor.java | 34 ++----------- .../druid/segment/UnnestDimensionCursor.java | 35 ++----------- .../druid/segment/UnnestStorageAdapter.java | 11 ++-- .../UnnestColumnValueSelectorCursorTest.java | 51 +++++++------------ 4 files changed, 27 insertions(+), 104 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 3b0a04857d25..9179d55dc1f8 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -22,12 +22,9 @@ import org.apache.druid.java.util.common.UOE; import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.query.filter.Filter; -import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; -import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; @@ -52,9 +49,6 @@ * unnestCursor.advance() -> 'e' *

*

- * The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to - * the next available match. - *

* The index reference points to the index of each row that the unnest cursor is accessing through currentVal * The index ranges from 0 to the size of the list in each row which is held in the unnestListForCurrentRow *

@@ -71,16 +65,13 @@ public class UnnestColumnValueSelectorCursor implements Cursor private Object currentVal; private List unnestListForCurrentRow; private boolean needInitialization; - private ValueMatcher valueMatcher; - @Nullable - private final Filter allowFilter; + public UnnestColumnValueSelectorCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, - String outputColumnName, - @Nullable Filter allowFilter + String outputColumnName ) { this.baseCursor = cursor; @@ -93,7 +84,6 @@ public UnnestColumnValueSelectorCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowFilter = allowFilter; } @Override @@ -253,13 +243,7 @@ public void advance() @Override public void advanceUninterruptibly() { - while (true) { - advanceAndUpdate(); - boolean match = valueMatcher.matches(); - if (match || baseCursor.isDone()) { - return; - } - } + advanceAndUpdate(); } @Override @@ -308,22 +292,10 @@ private void getNextRow() /** * This initializes the unnest cursor and creates data structures * to start iterating over the values to be unnested. - * This would also create a bitset for dictonary encoded columns to - * check for matching values specified in allowedList of UnnestDataSource. */ private void initialize() { getNextRow(); - if (allowFilter != null) { - this.valueMatcher = allowFilter.makeMatcher(getColumnSelectorFactory()); - } else { - this.valueMatcher = BooleanValueMatcher.of(true); - } - // If the first value the index is pointing to does not match the filter - // advance the index to the first value which will match - if (!valueMatcher.matches()) { - advance(); - } needInitialization = false; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index c99995fe261e..6c4c1a953439 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -23,13 +23,11 @@ import org.apache.druid.query.BaseQuery; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.ValueMatcher; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnCapabilitiesImpl; import org.apache.druid.segment.data.IndexedInts; -import org.apache.druid.segment.filter.BooleanValueMatcher; import org.joda.time.DateTime; import javax.annotation.Nullable; @@ -58,15 +56,6 @@ *

* Total 5 advance calls above *

- * The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to - * the next available match. The hashSet is converted into a bitset (during initialization) for efficiency. - * If allowSet is ['c', 'd'] then the advance moves over to the next available match - *

- * advance() -> 2 -> 'c' - * advance() -> 3 -> 'd' (advances base cursor first) - * advance() -> 2 -> 'c' - *

- * Total 3 advance calls in this case *

* The index reference points to the index of each row that the unnest cursor is accessing * The indexedInts for each row are held in the indexedIntsForCurrentRow object @@ -79,9 +68,6 @@ public class UnnestDimensionCursor implements Cursor private final DimensionSelector dimSelector; private final VirtualColumn unnestColumn; private final String outputName; - @Nullable - private final Filter allowFilter; - private ValueMatcher valueMatcher; private final ColumnSelectorFactory baseColumnSelectorFactory; private int index; @Nullable @@ -93,8 +79,7 @@ public UnnestDimensionCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, - String outputColumnName, - @Nullable Filter allowFilter + String outputColumnName ) { this.baseCursor = cursor; @@ -107,7 +92,6 @@ public UnnestDimensionCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowFilter = allowFilter; } @Override @@ -283,13 +267,7 @@ public void advance() @Override public void advanceUninterruptibly() { - while (true) { - advanceAndUpdate(); - boolean match = valueMatcher.matches(); - if (match || baseCursor.isDone()) { - return; - } - } + advanceAndUpdate(); } @Override @@ -328,19 +306,12 @@ public void reset() private void initialize() { index = 0; - if (allowFilter != null) { - this.valueMatcher = allowFilter.makeMatcher(this.getColumnSelectorFactory()); - } else { - this.valueMatcher = BooleanValueMatcher.of(true); - } this.indexIntsForRow = new SingleIndexInts(); if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!valueMatcher.matches() && !baseCursor.isDone()) { - advance(); - } + needInitialization = false; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 7c40db0792a5..ee4b6a625412 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -117,16 +117,14 @@ public Sequence makeCursors( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - filterPair.rhs + outputColumnName ); } else { retVal = new UnnestColumnValueSelectorCursor( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - filterPair.rhs + outputColumnName ); } } else { @@ -134,8 +132,7 @@ public Sequence makeCursors( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - filterPair.rhs + outputColumnName ); } // This is needed at this moment for nested queries @@ -144,7 +141,7 @@ public Sequence makeCursors( return PostJoinCursor.wrap( retVal, virtualColumns, - null + filterPair.rhs ); } ); diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index 07364fc1a8cd..bafb50f7089a 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -70,8 +70,7 @@ public void test_list_unnest_cursors() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -104,8 +103,7 @@ public void test_list_unnest_cursors_user_supplied_list() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -136,8 +134,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -173,8 +170,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -207,8 +203,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -237,8 +232,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -272,8 +266,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -306,15 +299,13 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"" + OUTPUT_NAME + "\"", null, ExprMacroTable.nil()), - "tmp-out", - null + "tmp-out" ); ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory() .makeColumnValueSelector("tmp-out"); @@ -348,8 +339,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -386,8 +376,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -424,8 +413,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -458,8 +446,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -492,8 +479,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -529,8 +515,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -565,8 +550,7 @@ public void test_list_unnest_cursors_dimSelector() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); // should return a column value selector for this case BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory() @@ -608,8 +592,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - null + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); From a0309f637ca5efddd02f03d56d77276d69b06b4c Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 13 Mar 2023 11:38:10 -0700 Subject: [PATCH 3/7] Removing stale comments and updating docs --- docs/querying/datasource.md | 6 ++---- .../java/org/apache/druid/segment/UnnestStorageAdapter.java | 4 ---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/docs/querying/datasource.md b/docs/querying/datasource.md index 442024dd75f5..e3eacf032bf3 100644 --- a/docs/querying/datasource.md +++ b/docs/querying/datasource.md @@ -412,15 +412,13 @@ The `unnest` datasource uses the following syntax: "type": "expression", "expression": "\"column_reference\"" }, - "outputName": "unnested_target_column", - "allowList": [] - }, + "outputName": "unnested_target_column" + } ``` * `dataSource.type`: Set this to `unnest`. * `dataSource.base`: Defines the datasource you want to unnest. * `dataSource.base.type`: The type of datasource you want to unnest, such as a table. * `dataSource.virtualColumn`: [Virtual column](virtual-columns.md) that references the nested values. The output name of this column is reused as the name of the column that contains unnested values. You can replace the source column with the unnested column by specifying the source column's name or a new column by specifying a different name. Outputting it to a new column can help you verify that you get the results that you expect but isn't required. -* `dataSource.allowList`: Optional. The subset of values you want to unnest. To learn more about how to use the `unnest` datasource, see the [unnest tutorial](../tutorials/tutorial-unnest-datasource.md). diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index ee4b6a625412..94c471d1a6e7 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -135,9 +135,6 @@ public Sequence makeCursors( outputColumnName ); } - // This is needed at this moment for nested queries - // Future developer would want to move the virtual columns - // inside the UnnestCursor and wrap the columnSelectorFactory return PostJoinCursor.wrap( retVal, virtualColumns, @@ -297,7 +294,6 @@ void add(@Nullable final Filter filter) // any rows that do not match this filter at all. preFilters.add(newFilter); } - // This is needed as a filter on an MV String Dimension returns the entire row matching the filter // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. postFilters.add(filter); } else { From 06f7cfbc2100e67f010d44dd7b1f21925b121bce Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 13 Mar 2023 21:32:16 -0700 Subject: [PATCH 4/7] Temp changes for selector filter --- .../rule/CorrelateFilterRTransposeRule.java | 2 +- ...lateProjectOnFIlterRightTransposeRule.java | 85 +++++++++++ .../druid/sql/calcite/rule/DruidRules.java | 1 + .../sql/calcite/CalciteArraysQueryTest.java | 144 ++++++++++++++++++ 4 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java index 66731ca78ade..00f472c5acc8 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java @@ -84,7 +84,7 @@ public void onMatch(final RelOptRuleCall call) /** * Whether an expression refers to correlation variables. */ - private static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) + public static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) { class CorrelationVisitor extends RexVisitorImpl { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java new file mode 100644 index 000000000000..e9782bcd1a7b --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Uncollect; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; + +/** + * Rule that pulls a {@link Filter} from the right-hand side of a {@link Correlate} above the Correlate in presence of an unneeded Project + * Allows filters on unnested fields to be added to queries that use {@link org.apache.druid.query.UnnestDataSource}. + * + * @see CorrelateFilterRTransposeRule similar, but for without a Project atop Filter + */ +public class CorrelateProjectOnFIlterRightTransposeRule extends RelOptRule +{ + private static final CorrelateProjectOnFIlterRightTransposeRule INSTANCE = new CorrelateProjectOnFIlterRightTransposeRule(); + + public CorrelateProjectOnFIlterRightTransposeRule() + { + super( + operand( + Correlate.class, + operand(RelNode.class, any()), + operand(Project.class, operand(Filter.class, operand(Uncollect.class, any()))) + )); + } + + @Override + public boolean matches(RelOptRuleCall call) + { + final Correlate correlate = call.rel(0); + final Filter right = call.rel(3); + + // Can't pull up filters that explicitly refer to the correlation variable. + return !CorrelateFilterRTransposeRule.usesCorrelationId(correlate.getCorrelationId(), right.getCondition()); + } + + public static CorrelateProjectOnFIlterRightTransposeRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(RelOptRuleCall call) + { + final Correlate correlate = call.rel(0); + final RelNode left = call.rel(1); + final Project rightP = call.rel(2); + final Filter rightF = call.rel(3); + + if (rightP.getProjects().size() <=1 && rightP.getChildExps().get(0).getKind() == SqlKind.CAST) { + call.transformTo( + call.builder() + .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, rightF.getInput()))) + .filter(RexUtil.shift(rightF.getCondition(), left.getRowType().getFieldCount())) + .build() + ); + } + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 519e9365b4a9..3bfe21d3c72d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -121,6 +121,7 @@ public static List rules(PlannerContext plannerContext) retVal.add(ProjectCorrelateTransposeRule.INSTANCE); retVal.add(CorrelateFilterLTransposeRule.instance()); retVal.add(CorrelateFilterRTransposeRule.instance()); + retVal.add(CorrelateProjectOnFIlterRightTransposeRule.instance()); } return retVal; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index f8976de7b053..6ffb76e76a6b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3737,4 +3737,148 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() ) ); } + + @Test + public void testUnnestWithSelectorFiltersOnAllColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT * FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(selector("j0.unnest", "b", null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of( + "__time", + "cnt", + "d1", + "d2", + "dim1", + "dim2", + "dim3", + "dim4", + "dim5", + "dim6", + "f1", + "f2", + "j0.unnest", + "l1", + "l2", + "m1", + "m2", + "unique_dim1" + )) + .build() + ), + ImmutableList.of( + new Object[]{ + 946684800000L, + "", + "a", + "[\"a\",\"b\"]", + "a", + "aa", + "1", + 1.0D, + 0.0D, + 1.0F, + 0.0F, + 7L, + 0L, + 1L, + 1.0F, + 1.0D, + "\"AQAAAEAAAA==\"", + "b" + }, + new Object[]{ + 946771200000L, + "10.1", + "", + "[\"b\",\"c\"]", + "a", + "ab", + "2", + 1.7D, + 1.7D, + 0.1F, + 0.1F, + 325323L, + 325323L, + 1L, + 2.0F, + 2.0D, + "\"AQAAAQAAAAHNBA==\"", + "b" + } + ) + ); + } + + @Test + public void testUnnestWithSelectorFiltersOnSelectedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(selector("j0.unnest", "b", null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithOrFiltersOnSelectedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(selector("j0.unnest", "b", null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } } From 112fb54087719fff0d547b1b3927868528e9463e Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 13 Mar 2023 22:35:51 -0700 Subject: [PATCH 5/7] Handling rules for a case where selector filters adds an extra layer of project before the filter on top of uncollect --- ...lateProjectOnFIlterRightTransposeRule.java | 12 ++- .../sql/calcite/CalciteArraysQueryTest.java | 100 ++---------------- 2 files changed, 18 insertions(+), 94 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java index e9782bcd1a7b..37aa4ef2f146 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java @@ -72,8 +72,18 @@ public void onMatch(RelOptRuleCall call) final RelNode left = call.rel(1); final Project rightP = call.rel(2); final Filter rightF = call.rel(3); + final Uncollect uncollect = call.rel(4); - if (rightP.getProjects().size() <=1 && rightP.getChildExps().get(0).getKind() == SqlKind.CAST) { + // The project is top of Uncollect and can only refer to the unnested output column + // the project can be a cast if on a string column + // e.g. LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) + // Or for numeric columns takes the shape + // e.g. LogicalProject(subset=[rel#126:Subset#5.NONE.[]], d3=[1.0E0:FLOAT]) + // The projection is always bound to be on a single column reference which will be output of the uncollect + // So we check if there is only a single projection and is it either a CAST or a LITERAL + + final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind(); + if (rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL)) { call.transformTo( call.builder() .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, rightF.getInput()))) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6ffb76e76a6b..593f91936f94 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3738,99 +3738,13 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() ); } - @Test - public void testUnnestWithSelectorFiltersOnAllColumns() - { - skipVectorize(); - cannotVectorize(); - testQuery( - "SELECT * FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) - )) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "j0.unnest", - "l1", - "l2", - "m1", - "m2", - "unique_dim1" - )) - .build() - ), - ImmutableList.of( - new Object[]{ - 946684800000L, - "", - "a", - "[\"a\",\"b\"]", - "a", - "aa", - "1", - 1.0D, - 0.0D, - 1.0F, - 0.0F, - 7L, - 0L, - 1L, - 1.0F, - 1.0D, - "\"AQAAAEAAAA==\"", - "b" - }, - new Object[]{ - 946771200000L, - "10.1", - "", - "[\"b\",\"c\"]", - "a", - "ab", - "2", - 1.7D, - 1.7D, - 0.1F, - 0.1F, - 325323L, - 325323L, - 1L, - 2.0F, - 2.0D, - "\"AQAAAQAAAAHNBA==\"", - "b" - } - ) - ); - } - @Test public void testUnnestWithSelectorFiltersOnSelectedColumn() { skipVectorize(); cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b'", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3854,30 +3768,30 @@ public void testUnnestWithSelectorFiltersOnSelectedColumn() } @Test - public void testUnnestWithOrFiltersOnSelectedColumn() + public void testUnnestWithSelectorFiltersOnSelectedVirtualColumn() { skipVectorize(); cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + "SELECT d3 FROM druid.numfoo, UNNEST(ARRAY[m1,m2]) as unnested (d3) where d3=1 ", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) + .filters(selector("j0.unnest", "1", null)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) .build() ), ImmutableList.of( - new Object[]{"b"}, - new Object[]{"b"} + new Object[]{1.0f}, + new Object[]{1.0f} ) ); } From 89190437df4c1c9237ef6fe8f212fc8fc1e17920 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 14 Mar 2023 13:22:32 -0700 Subject: [PATCH 6/7] Revert "Handling rules for a case where selector filters adds an extra layer of project before the filter on top of uncollect" This reverts commit 112fb54087719fff0d547b1b3927868528e9463e. --- ...lateProjectOnFIlterRightTransposeRule.java | 12 +-- .../sql/calcite/CalciteArraysQueryTest.java | 100 ++++++++++++++++-- 2 files changed, 94 insertions(+), 18 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java index 37aa4ef2f146..e9782bcd1a7b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java @@ -72,18 +72,8 @@ public void onMatch(RelOptRuleCall call) final RelNode left = call.rel(1); final Project rightP = call.rel(2); final Filter rightF = call.rel(3); - final Uncollect uncollect = call.rel(4); - // The project is top of Uncollect and can only refer to the unnested output column - // the project can be a cast if on a string column - // e.g. LogicalProject(subset=[rel#123:Subset#5.NONE.[]], d3=[CAST('b':VARCHAR):VARCHAR]) - // Or for numeric columns takes the shape - // e.g. LogicalProject(subset=[rel#126:Subset#5.NONE.[]], d3=[1.0E0:FLOAT]) - // The projection is always bound to be on a single column reference which will be output of the uncollect - // So we check if there is only a single projection and is it either a CAST or a LITERAL - - final SqlKind rightProjectKind = rightP.getChildExps().get(0).getKind(); - if (rightP.getProjects().size() == 1 && (rightProjectKind == SqlKind.CAST || rightProjectKind == SqlKind.LITERAL)) { + if (rightP.getProjects().size() <=1 && rightP.getChildExps().get(0).getKind() == SqlKind.CAST) { call.transformTo( call.builder() .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, rightF.getInput()))) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 593f91936f94..6ffb76e76a6b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3738,13 +3738,99 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() ); } + @Test + public void testUnnestWithSelectorFiltersOnAllColumns() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT * FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(selector("j0.unnest", "b", null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of( + "__time", + "cnt", + "d1", + "d2", + "dim1", + "dim2", + "dim3", + "dim4", + "dim5", + "dim6", + "f1", + "f2", + "j0.unnest", + "l1", + "l2", + "m1", + "m2", + "unique_dim1" + )) + .build() + ), + ImmutableList.of( + new Object[]{ + 946684800000L, + "", + "a", + "[\"a\",\"b\"]", + "a", + "aa", + "1", + 1.0D, + 0.0D, + 1.0F, + 0.0F, + 7L, + 0L, + 1L, + 1.0F, + 1.0D, + "\"AQAAAEAAAA==\"", + "b" + }, + new Object[]{ + 946771200000L, + "10.1", + "", + "[\"b\",\"c\"]", + "a", + "ab", + "2", + 1.7D, + 1.7D, + 0.1F, + 0.1F, + 325323L, + 325323L, + 1L, + 2.0F, + 2.0D, + "\"AQAAAQAAAAHNBA==\"", + "b" + } + ) + ); + } + @Test public void testUnnestWithSelectorFiltersOnSelectedColumn() { skipVectorize(); cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b'", + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3768,30 +3854,30 @@ public void testUnnestWithSelectorFiltersOnSelectedColumn() } @Test - public void testUnnestWithSelectorFiltersOnSelectedVirtualColumn() + public void testUnnestWithOrFiltersOnSelectedColumn() { skipVectorize(); cannotVectorize(); testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(ARRAY[m1,m2]) as unnested (d3) where d3=1 ", + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "array(\"m1\",\"m2\")", ColumnType.FLOAT_ARRAY) + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "1", null)) + .filters(selector("j0.unnest", "b", null)) .legacy(false) .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of("j0.unnest")) .build() ), ImmutableList.of( - new Object[]{1.0f}, - new Object[]{1.0f} + new Object[]{"b"}, + new Object[]{"b"} ) ); } From 10b9776c84ffd7a972d1b253b0384757f45ffb38 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Tue, 14 Mar 2023 13:23:02 -0700 Subject: [PATCH 7/7] Revert "Temp changes for selector filter" This reverts commit 06f7cfbc2100e67f010d44dd7b1f21925b121bce. --- .../rule/CorrelateFilterRTransposeRule.java | 2 +- ...lateProjectOnFIlterRightTransposeRule.java | 85 ----------- .../druid/sql/calcite/rule/DruidRules.java | 1 - .../sql/calcite/CalciteArraysQueryTest.java | 144 ------------------ 4 files changed, 1 insertion(+), 231 deletions(-) delete mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java index 00f472c5acc8..66731ca78ade 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java @@ -84,7 +84,7 @@ public void onMatch(final RelOptRuleCall call) /** * Whether an expression refers to correlation variables. */ - public static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) + private static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) { class CorrelationVisitor extends RexVisitorImpl { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java deleted file mode 100644 index e9782bcd1a7b..000000000000 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateProjectOnFIlterRightTransposeRule.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.sql.calcite.rule; - -import com.google.common.collect.ImmutableList; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.core.Correlate; -import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Project; -import org.apache.calcite.rel.core.Uncollect; -import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.sql.SqlKind; - -/** - * Rule that pulls a {@link Filter} from the right-hand side of a {@link Correlate} above the Correlate in presence of an unneeded Project - * Allows filters on unnested fields to be added to queries that use {@link org.apache.druid.query.UnnestDataSource}. - * - * @see CorrelateFilterRTransposeRule similar, but for without a Project atop Filter - */ -public class CorrelateProjectOnFIlterRightTransposeRule extends RelOptRule -{ - private static final CorrelateProjectOnFIlterRightTransposeRule INSTANCE = new CorrelateProjectOnFIlterRightTransposeRule(); - - public CorrelateProjectOnFIlterRightTransposeRule() - { - super( - operand( - Correlate.class, - operand(RelNode.class, any()), - operand(Project.class, operand(Filter.class, operand(Uncollect.class, any()))) - )); - } - - @Override - public boolean matches(RelOptRuleCall call) - { - final Correlate correlate = call.rel(0); - final Filter right = call.rel(3); - - // Can't pull up filters that explicitly refer to the correlation variable. - return !CorrelateFilterRTransposeRule.usesCorrelationId(correlate.getCorrelationId(), right.getCondition()); - } - - public static CorrelateProjectOnFIlterRightTransposeRule instance() - { - return INSTANCE; - } - - @Override - public void onMatch(RelOptRuleCall call) - { - final Correlate correlate = call.rel(0); - final RelNode left = call.rel(1); - final Project rightP = call.rel(2); - final Filter rightF = call.rel(3); - - if (rightP.getProjects().size() <=1 && rightP.getChildExps().get(0).getKind() == SqlKind.CAST) { - call.transformTo( - call.builder() - .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, rightF.getInput()))) - .filter(RexUtil.shift(rightF.getCondition(), left.getRowType().getFieldCount())) - .build() - ); - } - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 3bfe21d3c72d..519e9365b4a9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -121,7 +121,6 @@ public static List rules(PlannerContext plannerContext) retVal.add(ProjectCorrelateTransposeRule.INSTANCE); retVal.add(CorrelateFilterLTransposeRule.instance()); retVal.add(CorrelateFilterRTransposeRule.instance()); - retVal.add(CorrelateProjectOnFIlterRightTransposeRule.instance()); } return retVal; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 6ffb76e76a6b..f8976de7b053 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3737,148 +3737,4 @@ public void testUnnestWithNotFiltersOnUnnestedColumn() ) ); } - - @Test - public void testUnnestWithSelectorFiltersOnAllColumns() - { - skipVectorize(); - cannotVectorize(); - testQuery( - "SELECT * FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) - )) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "j0.unnest", - "l1", - "l2", - "m1", - "m2", - "unique_dim1" - )) - .build() - ), - ImmutableList.of( - new Object[]{ - 946684800000L, - "", - "a", - "[\"a\",\"b\"]", - "a", - "aa", - "1", - 1.0D, - 0.0D, - 1.0F, - 0.0F, - 7L, - 0L, - 1L, - 1.0F, - 1.0D, - "\"AQAAAEAAAA==\"", - "b" - }, - new Object[]{ - 946771200000L, - "10.1", - "", - "[\"b\",\"c\"]", - "a", - "ab", - "2", - 1.7D, - 1.7D, - 0.1F, - 0.1F, - 325323L, - 325323L, - 1L, - 2.0F, - 2.0D, - "\"AQAAAQAAAAHNBA==\"", - "b" - } - ) - ); - } - - @Test - public void testUnnestWithSelectorFiltersOnSelectedColumn() - { - skipVectorize(); - cannotVectorize(); - testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) - )) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of("j0.unnest")) - .build() - ), - ImmutableList.of( - new Object[]{"b"}, - new Object[]{"b"} - ) - ); - } - - @Test - public void testUnnestWithOrFiltersOnSelectedColumn() - { - skipVectorize(); - cannotVectorize(); - testQuery( - "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3='b' ", - QUERY_CONTEXT_UNNEST, - ImmutableList.of( - Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) - )) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .filters(selector("j0.unnest", "b", null)) - .legacy(false) - .context(QUERY_CONTEXT_UNNEST) - .columns(ImmutableList.of("j0.unnest")) - .build() - ), - ImmutableList.of( - new Object[]{"b"}, - new Object[]{"b"} - ) - ); - } }