diff --git a/docs/querying/datasource.md b/docs/querying/datasource.md index 442024dd75f5..e3eacf032bf3 100644 --- a/docs/querying/datasource.md +++ b/docs/querying/datasource.md @@ -412,15 +412,13 @@ The `unnest` datasource uses the following syntax: "type": "expression", "expression": "\"column_reference\"" }, - "outputName": "unnested_target_column", - "allowList": [] - }, + "outputName": "unnested_target_column" + } ``` * `dataSource.type`: Set this to `unnest`. * `dataSource.base`: Defines the datasource you want to unnest. * `dataSource.base.type`: The type of datasource you want to unnest, such as a table. * `dataSource.virtualColumn`: [Virtual column](virtual-columns.md) that references the nested values. The output name of this column is reused as the name of the column that contains unnested values. You can replace the source column with the unnested column by specifying the source column's name or a new column by specifying a different name. Outputting it to a new column can help you verify that you get the results that you expect but isn't required. -* `dataSource.allowList`: Optional. The subset of values you want to unnest. To learn more about how to use the `unnest` datasource, see the [unnest tutorial](../tutorials/tutorial-unnest-datasource.md). diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 2bb24784adfb..d46cd226a8a5 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -29,8 +29,6 @@ import org.apache.druid.segment.VirtualColumn; import org.apache.druid.utils.JvmUtils; -import javax.annotation.Nullable; -import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; @@ -39,7 +37,6 @@ /** * The data source for representing an unnest operation. - * * An unnest data source has the following: * a base data source which is to be unnested * the column name of the MVD which will be unnested @@ -50,27 +47,23 @@ public class UnnestDataSource implements DataSource { private final DataSource base; private final VirtualColumn virtualColumn; - private final LinkedHashSet allowList; private UnnestDataSource( DataSource dataSource, - VirtualColumn virtualColumn, - LinkedHashSet allowList + VirtualColumn virtualColumn ) { this.base = dataSource; this.virtualColumn = virtualColumn; - this.allowList = allowList; } @JsonCreator public static UnnestDataSource create( @JsonProperty("base") DataSource base, - @JsonProperty("virtualColumn") VirtualColumn virtualColumn, - @Nullable @JsonProperty("allowList") LinkedHashSet allowList + @JsonProperty("virtualColumn") VirtualColumn virtualColumn ) { - return new UnnestDataSource(base, virtualColumn, allowList); + return new UnnestDataSource(base, virtualColumn); } @JsonProperty("base") @@ -85,12 +78,6 @@ public VirtualColumn getVirtualColumn() return virtualColumn; } - @JsonProperty("allowList") - public LinkedHashSet getAllowList() - { - return allowList; - } - @Override public Set getTableNames() { @@ -109,7 +96,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), virtualColumn, allowList); + return new UnnestDataSource(children.get(0), virtualColumn); } @Override @@ -146,17 +133,15 @@ public Function createSegmentMapFunction( baseSegment -> new UnnestSegmentReference( segmentMapFn.apply(baseSegment), - virtualColumn, - allowList + virtualColumn ) ); - } @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, virtualColumn, allowList); + return new UnnestDataSource(newSource, virtualColumn); } @Override @@ -203,7 +188,6 @@ public String toString() return "UnnestDataSource{" + "base=" + base + ", column='" + virtualColumn + '\'' + - ", allowList=" + allowList + '}'; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 1a3bbf4e0015..9179d55dc1f8 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -30,7 +30,6 @@ import javax.annotation.Nullable; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; /** @@ -50,9 +49,6 @@ * unnestCursor.advance() -> 'e' *

*

- * The allowSet if available helps skip over elements which are not in the allowList by moving the cursor to - * the next available match. - *

* The index reference points to the index of each row that the unnest cursor is accessing through currentVal * The index ranges from 0 to the size of the list in each row which is held in the unnestListForCurrentRow *

@@ -65,18 +61,17 @@ public class UnnestColumnValueSelectorCursor implements Cursor private final ColumnValueSelector columnValueSelector; private final VirtualColumn unnestColumn; private final String outputName; - private final LinkedHashSet allowSet; private int index; private Object currentVal; private List unnestListForCurrentRow; private boolean needInitialization; + public UnnestColumnValueSelectorCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, - String outputColumnName, - LinkedHashSet allowSet + String outputColumnName ) { this.baseCursor = cursor; @@ -89,7 +84,6 @@ public UnnestColumnValueSelectorCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; } @Override @@ -194,11 +188,7 @@ public boolean isNull() public Object getObject() { if (!unnestListForCurrentRow.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return unnestListForCurrentRow.get(index); - } else if (allowSet.contains((String) unnestListForCurrentRow.get(index))) { - return unnestListForCurrentRow.get(index); - } + return unnestListForCurrentRow.get(index); } return null; } @@ -253,9 +243,7 @@ public void advance() @Override public void advanceUninterruptibly() { - do { - advanceAndUpdate(); - } while (matchAndProceed()); + advanceAndUpdate(); } @Override @@ -304,19 +292,10 @@ private void getNextRow() /** * This initializes the unnest cursor and creates data structures * to start iterating over the values to be unnested. - * This would also create a bitset for dictonary encoded columns to - * check for matching values specified in allowedList of UnnestDataSource. */ private void initialize() { getNextRow(); - if (allowSet != null) { - if (!allowSet.isEmpty()) { - if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) { - advance(); - } - } - } needInitialization = false; } @@ -338,22 +317,4 @@ private void advanceAndUpdate() index++; } } - - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if (allowSet == null || allowSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowSet.contains((String) unnestListForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index ba91f27815d8..6c4c1a953439 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -31,8 +31,6 @@ import org.joda.time.DateTime; import javax.annotation.Nullable; -import java.util.BitSet; -import java.util.LinkedHashSet; /** * The cursor to help unnest MVDs with dictionary encoding. @@ -58,15 +56,6 @@ *

* Total 5 advance calls above *

- * The allowSet, if available, helps skip over elements that are not in the allowList by moving the cursor to - * the next available match. The hashSet is converted into a bitset (during initialization) for efficiency. - * If allowSet is ['c', 'd'] then the advance moves over to the next available match - *

- * advance() -> 2 -> 'c' - * advance() -> 3 -> 'd' (advances base cursor first) - * advance() -> 2 -> 'c' - *

- * Total 3 advance calls in this case *

* The index reference points to the index of each row that the unnest cursor is accessing * The indexedInts for each row are held in the indexedIntsForCurrentRow object @@ -79,8 +68,6 @@ public class UnnestDimensionCursor implements Cursor private final DimensionSelector dimSelector; private final VirtualColumn unnestColumn; private final String outputName; - private final LinkedHashSet allowSet; - private final BitSet allowedBitSet; private final ColumnSelectorFactory baseColumnSelectorFactory; private int index; @Nullable @@ -92,8 +79,7 @@ public UnnestDimensionCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, VirtualColumn unnestColumn, - String outputColumnName, - LinkedHashSet allowSet + String outputColumnName ) { this.baseCursor = cursor; @@ -106,8 +92,6 @@ public UnnestDimensionCursor( this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; - this.allowSet = allowSet; - this.allowedBitSet = new BitSet(); } @Override @@ -158,6 +142,9 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) @Override public boolean matches() { + if (indexedIntsForCurrentRow.size() <= 0) { + return false; + } return idForLookup == indexedIntsForCurrentRow.get(index); } @@ -188,14 +175,7 @@ public Object getObject() if (indexedIntsForCurrentRow == null || indexedIntsForCurrentRow.size() == 0) { return null; } - if (allowedBitSet.isEmpty()) { - if (allowSet == null || allowSet.isEmpty()) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - } else if (allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - return lookupName(indexedIntsForCurrentRow.get(index)); - } - return null; + return lookupName(indexedIntsForCurrentRow.get(index)); } @Override @@ -207,9 +187,6 @@ public Class classOfObject() @Override public int getValueCardinality() { - if (!allowedBitSet.isEmpty()) { - return allowedBitSet.cardinality(); - } return dimSelector.getValueCardinality(); } @@ -290,9 +267,7 @@ public void advance() @Override public void advanceUninterruptibly() { - do { - advanceAndUpdate(); - } while (matchAndProceed()); + advanceAndUpdate(); } @Override @@ -330,23 +305,13 @@ public void reset() @Nullable private void initialize() { - IdLookup idLookup = dimSelector.idLookup(); + index = 0; this.indexIntsForRow = new SingleIndexInts(); - if (allowSet != null && !allowSet.isEmpty() && idLookup != null) { - for (String s : allowSet) { - if (idLookup.lookupId(s) >= 0) { - allowedBitSet.set(idLookup.lookupId(s)); - } - } - } + if (dimSelector.getObject() != null) { this.indexedIntsForCurrentRow = dimSelector.getRow(); } - if (!allowedBitSet.isEmpty()) { - if (!allowedBitSet.get(indexedIntsForCurrentRow.get(index))) { - advance(); - } - } + needInitialization = false; } @@ -362,6 +327,9 @@ private void advanceAndUpdate() index = 0; if (!baseCursor.isDone()) { baseCursor.advanceUninterruptibly(); + if (!baseCursor.isDone()) { + indexedIntsForCurrentRow = dimSelector.getRow(); + } } } else { if (index >= indexedIntsForCurrentRow.size() - 1) { @@ -378,23 +346,6 @@ private void advanceAndUpdate() } } - /** - * This advances the unnest cursor in cases where an allowList is specified - * and the current value at the unnest cursor is not in the allowList. - * The cursor in such cases is moved till the next match is found. - * - * @return a boolean to indicate whether to stay or move cursor - */ - private boolean matchAndProceed() - { - boolean matchStatus; - if ((allowSet == null || allowSet.isEmpty()) && allowedBitSet.isEmpty()) { - matchStatus = true; - } else { - matchStatus = allowedBitSet.get(indexedIntsForCurrentRow.get(index)); - } - return !baseCursor.isDone() && !matchStatus; - } // Helper class to help in returning // getRow from the dimensionSelector diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index a5db64d2b2d3..8f3871390045 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -28,7 +28,6 @@ import javax.annotation.Nullable; import java.io.Closeable; import java.io.IOException; -import java.util.LinkedHashSet; import java.util.Optional; /** @@ -41,17 +40,15 @@ public class UnnestSegmentReference implements SegmentReference private final SegmentReference baseSegment; private final VirtualColumn unnestColumn; - private final LinkedHashSet allowSet; + public UnnestSegmentReference( SegmentReference baseSegment, - VirtualColumn unnestColumn, - LinkedHashSet allowList + VirtualColumn unnestColumn ) { this.baseSegment = baseSegment; this.unnestColumn = unnestColumn; - this.allowSet = allowList; } @Override @@ -103,8 +100,7 @@ public StorageAdapter asStorageAdapter() { return new UnnestStorageAdapter( baseSegment.asStorageAdapter(), - unnestColumn, - allowSet + unnestColumn ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index 939333c75402..94c471d1a6e7 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -62,18 +62,15 @@ public class UnnestStorageAdapter implements StorageAdapter private final StorageAdapter baseAdapter; private final VirtualColumn unnestColumn; private final String outputColumnName; - private final LinkedHashSet allowSet; public UnnestStorageAdapter( final StorageAdapter baseAdapter, - final VirtualColumn unnestColumn, - final LinkedHashSet allowSet + final VirtualColumn unnestColumn ) { this.baseAdapter = baseAdapter; this.unnestColumn = unnestColumn; this.outputColumnName = unnestColumn.getOutputName(); - this.allowSet = allowSet; } @Override @@ -87,7 +84,7 @@ public Sequence makeCursors( ) { final String inputColumn = getUnnestInputIfDirectAccess(); - final Pair filterPair = computeBaseAndPostCorrelateFilters( + final Pair filterPair = computeBaseAndPostUnnestFilters( filter, virtualColumns, inputColumn, @@ -120,16 +117,14 @@ public Sequence makeCursors( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - allowSet + outputColumnName ); } else { retVal = new UnnestColumnValueSelectorCursor( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - allowSet + outputColumnName ); } } else { @@ -137,8 +132,7 @@ public Sequence makeCursors( retVal, retVal.getColumnSelectorFactory(), unnestColumn, - outputColumnName, - allowSet + outputColumnName ); } return PostJoinCursor.wrap( @@ -260,9 +254,9 @@ public VirtualColumn getUnnestColumn() * @param inputColumn input column to unnest if it's a direct access; otherwise null * @param inputColumnCapabilites input column capabilities if known; otherwise null * - * @return pair of pre- and post-correlate filters + * @return pair of pre- and post-unnest filters */ - private Pair computeBaseAndPostCorrelateFilters( + private Pair computeBaseAndPostUnnestFilters( @Nullable final Filter queryFilter, final VirtualColumns queryVirtualColumns, @Nullable final String inputColumn, @@ -282,7 +276,7 @@ void add(@Nullable final Filter filter) final Set requiredColumns = filter.getRequiredColumns(); - // Run filter post-correlate if it refers to any virtual columns. + // Run filter post-unnest if it refers to any virtual columns. if (queryVirtualColumns.getVirtualColumns().length > 0) { for (String column : requiredColumns) { if (queryVirtualColumns.exists(column)) { @@ -293,13 +287,15 @@ void add(@Nullable final Filter filter) } if (requiredColumns.contains(outputColumnName)) { - // Try to move filter pre-correlate if possible. + // Rewrite filter post-unnest if possible. final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); if (newFilter != null) { + // Add the rewritten filter pre-unnest, so we get the benefit of any indexes, and so we avoid unnesting + // any rows that do not match this filter at all. preFilters.add(newFilter); - } else { - postFilters.add(filter); } + // Add original filter post-unnest no matter what: we need to filter out any extraneous unnested values. + postFilters.add(filter); } else { preFilters.add(filter); } @@ -308,11 +304,6 @@ void add(@Nullable final Filter filter) final FilterSplitter filterSplitter = new FilterSplitter(); - if (allowSet != null && !allowSet.isEmpty()) { - // Filter on input column if possible (it may be faster); otherwise use output column. - filterSplitter.add(new InDimFilter(inputColumn != null ? inputColumn : outputColumnName, allowSet)); - } - if (queryFilter instanceof AndFilter) { for (Filter filter : ((AndFilter) queryFilter).getFilters()) { filterSplitter.add(filter); diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index aec318aa7a9e..2f4119af8f83 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -111,8 +111,7 @@ public class QueryRunnerTestHelper "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) ); public static final Granularity DAY_GRAN = Granularities.DAY; diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index 826d612f678c..a09d2fc256e9 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -239,8 +239,7 @@ public void testGroupBy() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions(new DefaultDimensionSpec("quality", "alias")) @@ -453,8 +452,7 @@ public void testGroupByOnMissingColumn() "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", null, ExprMacroTable.nil() - ), - null + ) )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) .setDimensions( @@ -566,8 +564,7 @@ public void testGroupByOnUnnestedVirtualColumn() "mv_to_array(placementish)", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ), - null + ) ); GroupByQuery query = makeQueryBuilder() @@ -655,8 +652,7 @@ public void testGroupByOnUnnestedVirtualMultiColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) ); GroupByQuery query = makeQueryBuilder() diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index 160f06140c40..a0fa2b929e09 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -22,7 +22,6 @@ import com.google.common.collect.Lists; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.DefaultGenericQueryMetricsFactory; import org.apache.druid.query.Druids; import org.apache.druid.query.QueryPlus; @@ -46,9 +45,7 @@ import org.junit.runners.Parameterized; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -96,27 +93,6 @@ private Druids.ScanQueryBuilder newTestUnnestQuery() .legacy(legacy); } - private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet() - { - List allowList = Arrays.asList("a", "b", "c"); - LinkedHashSet allowSet = new LinkedHashSet(allowList); - return Druids.newScanQueryBuilder() - .dataSource(UnnestDataSource.create( - new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - new ExpressionVirtualColumn( - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", - null, - ExprMacroTable.nil() - ), - allowSet - )) - .columns(Collections.emptyList()) - .eternityInterval() - .limit(3) - .legacy(legacy); - } - @Test public void testScanOnUnnest() { @@ -188,8 +164,7 @@ public void testUnnestRunnerVirtualColumnsUsingSingleColumn() "mv_to_array(placementish)", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -258,8 +233,7 @@ public void testUnnestRunnerVirtualColumnsUsingMultipleColumn() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() @@ -461,65 +435,6 @@ public void testUnnestRunnerWithOrdering() ScanQueryRunnerTest.verify(ascendingExpectedResults, results); } - @Test - public void testUnnestRunnerNonNullAllowSet() - { - ScanQuery query = newTestUnnestQueryWithAllowSet() - .intervals(I_0112_0114) - .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - .limit(3) - .build(); - - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - FACTORY, - new IncrementalIndexSegment( - index, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - - Iterable results = queryRunner.run(QueryPlus.wrap(query)).toList(); - - String[] columnNames; - if (legacy) { - columnNames = new String[]{ - getTimestampName() + ":TIME", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } else { - columnNames = new String[]{ - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST - }; - } - String[] values; - if (legacy) { - values = new String[]{ - "2011-01-12T00:00:00.000Z\ta", - "2011-01-12T00:00:00.000Z\tb", - "2011-01-13T00:00:00.000Z\ta" - }; - } else { - values = new String[]{ - "a", - "b", - "a" - }; - } - - final List>> events = ScanQueryRunnerTest.toEvents(columnNames, legacy, values); - List expectedResults = toExpected( - events, - legacy - ? Lists.newArrayList(getTimestampName(), QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) - : Collections.singletonList(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST), - 0, - 3 - ); - ScanQueryRunnerTest.verify(expectedResults, results); - } - private String getTimestampName() { diff --git a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java index e822913489dd..c30022216bd4 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java @@ -258,8 +258,7 @@ public void testTopNStringVirtualColumnUnnest() "mv_to_array(\"placementish\")", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ), - null + ) )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) @@ -341,8 +340,7 @@ public void testTopNStringVirtualMultiColumnUnnest() "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ), - null + ) )) .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index 11b6b7e1d6a0..bafb50f7089a 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -35,14 +35,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.LinkedHashSet; import java.util.List; public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandlingTest { private static String OUTPUT_NAME = "unnested-column"; - private static LinkedHashSet IGNORE_SET = null; - private static LinkedHashSet IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f")); @BeforeClass public static void setUpClass() @@ -73,8 +70,7 @@ public void test_list_unnest_cursors() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -107,8 +103,7 @@ public void test_list_unnest_cursors_user_supplied_list() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -139,8 +134,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -176,8 +170,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -210,8 +203,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -240,8 +232,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -275,8 +266,7 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -309,15 +299,13 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"" + OUTPUT_NAME + "\"", null, ExprMacroTable.nil()), - "tmp-out", - IGNORE_SET + "tmp-out" ); ColumnValueSelector unnestColumnValueSelector = parentCursor.getColumnSelectorFactory() .makeColumnValueSelector("tmp-out"); @@ -351,8 +339,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -389,8 +376,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -408,44 +394,6 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() Assert.assertEquals(k, 10); } - @Test - public void test_list_unnest_cursors_user_supplied_list_with_ignore_set() - { - List inputList = Arrays.asList( - Arrays.asList("a", "b", "c"), - Arrays.asList("e", "f", "g", "h", "i"), - Collections.singletonList("j") - ); - - List expectedResults = Arrays.asList("b", "f"); - - //Create base cursor - ListCursor listCursor = new ListCursor(inputList); - - //Create unnest cursor - UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( - listCursor, - listCursor.getColumnSelectorFactory(), - new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET1 - ); - ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() - .makeColumnValueSelector(OUTPUT_NAME); - int k = 0; - while (!unnestCursor.isDone()) { - Object valueSelectorVal = unnestColumnValueSelector.getObject(); - if (valueSelectorVal == null) { - Assert.assertEquals(null, expectedResults.get(k)); - } else { - Assert.assertEquals(expectedResults.get(k), valueSelectorVal.toString()); - } - k++; - unnestCursor.advance(); - } - Assert.assertEquals(k, 2); - } - @Test public void test_list_unnest_cursors_user_supplied_list_double() { @@ -465,8 +413,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -499,8 +446,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -533,8 +479,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -570,8 +515,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); @@ -606,8 +550,7 @@ public void test_list_unnest_cursors_dimSelector() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); // should return a column value selector for this case BaseSingleValueDimensionSelector unnestDimSelector = (BaseSingleValueDimensionSelector) unnestCursor.getColumnSelectorFactory() @@ -649,8 +592,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() listCursor, listCursor.getColumnSelectorFactory(), new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), - OUTPUT_NAME, - IGNORE_SET + OUTPUT_NAME ); ColumnValueSelector unnestColumnValueSelector = unnestCursor.getColumnSelectorFactory() .makeColumnValueSelector(OUTPUT_NAME); diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index e064587a05ea..d852547e4ec4 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -46,7 +46,6 @@ import org.junit.Test; import java.util.Arrays; -import java.util.LinkedHashSet; import java.util.List; public class UnnestStorageAdapterTest extends InitializedNullHandlingTest @@ -56,13 +55,10 @@ public class UnnestStorageAdapterTest extends InitializedNullHandlingTest private static IncrementalIndexStorageAdapter INCREMENTAL_INDEX_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER; private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER1; - private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER2; - private static UnnestStorageAdapter UNNEST_STORAGE_ADAPTER3; private static List ADAPTERS; private static String COLUMNNAME = "multi-string1"; private static String OUTPUT_COLUMN_NAME = "unnested-multi-string1"; private static String OUTPUT_COLUMN_NAME1 = "unnested-multi-string1-again"; - private static LinkedHashSet IGNORE_SET = new LinkedHashSet<>(Arrays.asList("1", "3", "5")); @BeforeClass public static void setup() @@ -86,29 +82,17 @@ public static void setup() INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX); UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - null + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) ); + UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( - INCREMENTAL_INDEX_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - IGNORE_SET - ); - UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - null - ); - UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter( - UNNEST_STORAGE_ADAPTER1, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), - IGNORE_SET + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()) ); + ADAPTERS = ImmutableList.of( UNNEST_STORAGE_ADAPTER, - UNNEST_STORAGE_ADAPTER1, - UNNEST_STORAGE_ADAPTER2, - UNNEST_STORAGE_ADAPTER3 + UNNEST_STORAGE_ADAPTER1 ); } @@ -217,9 +201,9 @@ public void test_unnest_adapters_basic() @Test public void test_two_levels_of_unnest_adapters() { - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER2.makeCursors( + Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( null, - UNNEST_STORAGE_ADAPTER2.getInterval(), + UNNEST_STORAGE_ADAPTER1.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, @@ -256,148 +240,6 @@ public void test_two_levels_of_unnest_adapters() }); } - @Test - public void test_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - */ - Assert.assertEquals(count, 3); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_two_levels_of_unnest_adapters_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER3.makeCursors( - null, - UNNEST_STORAGE_ADAPTER3.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3; - assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME1)); - ColumnValueSelector valueSelector = factory.makeColumnValueSelector(OUTPUT_COLUMN_NAME1); - - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Object valueSelectorVal = valueSelector.getObject(); - if (dimSelectorVal == null) { - Assert.assertNull(dimSelectorVal); - } else if (valueSelectorVal == null) { - Assert.assertNull(valueSelectorVal); - } - cursor.advance(); - count++; - } - /* - each row has 8 distinct entries. - allowlist has 3 entries also the value cardinality - unnest will have 3 distinct entries - unnest of that unnest will have 3*3 = 9 entries - */ - Assert.assertEquals(count, 9); - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - return null; - }); - } - - @Test - public void test_unnest_adapters_methods_with_allowList() - { - final String columnName = "multi-string1"; - - Sequence cursorSequence = UNNEST_STORAGE_ADAPTER1.makeCursors( - null, - UNNEST_STORAGE_ADAPTER1.getInterval(), - VirtualColumns.EMPTY, - Granularities.ALL, - false, - null - ); - UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1; - assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); - Assert.assertEquals( - adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), - ColumnCapabilities.Capable.TRUE - ); - Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); - - cursorSequence.accumulate(null, (accumulated, cursor) -> { - ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); - - DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(OUTPUT_COLUMN_NAME)); - IdLookup idlookUp = dimSelector.idLookup(); - Assert.assertFalse(dimSelector.isNull()); - int[] indices = new int[]{1, 3, 5}; - int count = 0; - while (!cursor.isDone()) { - Object dimSelectorVal = dimSelector.getObject(); - Assert.assertEquals(idlookUp.lookupId((String) dimSelectorVal), indices[count]); - // after unnest first entry in get row should equal the object - // and the row size will always be 1 - Assert.assertEquals(dimSelector.getRow().get(0), indices[count]); - Assert.assertEquals(dimSelector.getRow().size(), 1); - Assert.assertNotNull(dimSelector.makeValueMatcher(OUTPUT_COLUMN_NAME)); - cursor.advance(); - count++; - } - Assert.assertEquals(dimSelector.getValueCardinality(), 3); - Assert.assertEquals(count, 3); - return null; - }); - } - private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier) { MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 10a12fbec3b3..5c5bfd8aae4e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -172,8 +172,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) correlateRowSignature.getColumnName(correlateRowSignature.size() - 1), Calcites.getColumnTypeForRelDataType(rexNodeToUnnest.getType()), getPlannerContext().getExprMacroTable() - ), - null + ) ), correlateRowSignature, getPlannerContext(), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 67d1971a16ef..f8976de7b053 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -2697,6 +2697,37 @@ public void testUnnestInline() ); } + @Test + public void testUnnestInlineWithCount() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT COUNT(*) FROM (select c from UNNEST(ARRAY[1,2,3]) as unnested(c))", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource( + InlineDataSource.fromIterable( + ImmutableList.of( + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L} + ), + RowSignature.builder().add("EXPR$0", ColumnType.LONG).build() + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .context(QUERY_CONTEXT_UNNEST) + .aggregators(aggregators(new CountAggregatorFactory("a0"))) + .build() + ), + ImmutableList.of( + new Object[]{3L} + ) + ); + } + @Test public void testUnnest() { @@ -2713,8 +2744,7 @@ public void testUnnest() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -2768,15 +2798,13 @@ public void testUnnestTwice() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ), - null + ) ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ), - null + ) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2835,15 +2863,13 @@ public void testUnnestTwiceWithFiltersAndExpressions() "j0.unnest", "string_to_array(\"dim1\",'\\u005C.')", ColumnType.STRING_ARRAY - ), - null + ) ), expressionVirtualColumn( "_j0.unnest", "\"dim3\"", ColumnType.STRING - ), - null + ) ) ) .intervals(querySegmentSpec(Filtration.eternity())) @@ -2898,8 +2924,7 @@ public void testUnnestWithGroupBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -2943,8 +2968,7 @@ public void testUnnestWithGroupByOrderBy() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -2999,8 +3023,7 @@ public void testUnnestWithGroupByOrderByWithLimit() new TopNQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .dimension(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) @@ -3038,8 +3061,7 @@ public void testUnnestWithGroupByHaving() GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .setInterval(querySegmentSpec(Filtration.eternity())) .setContext(QUERY_CONTEXT_UNNEST) @@ -3081,8 +3103,7 @@ public void testUnnestWithLimit() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3116,8 +3137,7 @@ public void testUnnestFirstQueryOnSelect() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3178,8 +3198,7 @@ public void testUnnestWithFilters() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3228,8 +3247,7 @@ public void testUnnestWithFiltersInsideAndOutside() .context(QUERY_CONTEXT_UNNEST) .build() ), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3262,8 +3280,7 @@ public void testUnnestWithFiltersOutside() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3305,8 +3322,7 @@ public void testUnnestWithInFilters() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) @@ -3342,8 +3358,7 @@ public void testUnnestVirtualWithColumns() Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), - null + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3386,8 +3401,7 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() "j0.unnest", "array(\"dim2\",\"dim4\")", ColumnType.STRING_ARRAY - ), - null + ) ) ) .setInterval(querySegmentSpec(Filtration.eternity())) @@ -3455,8 +3469,7 @@ public void testUnnestWithJoinOnTheLeft() "(\"dim2\" == \"j0.dim2\")", JoinType.INNER ), - expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING), - null + expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING) )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) @@ -3545,4 +3558,183 @@ public void testUnnestWithConstant() ) ); } + + @Test + public void testUnnestWithSQLFunctionOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT strlen(d3) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .virtualColumns(expressionVirtualColumn("v0", "strlen(\"j0.unnest\")", ColumnType.LONG)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("v0")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{0}, + new Object[]{0}, + new Object[]{0} + ) : + ImmutableList.of( + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{1}, + new Object[]{0}, + new Object[]{null}, + new Object[]{null} + ) + ); + } + + @Test + public void testUnnestWithINFiltersWithLeftRewrite() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('a','b')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithINFiltersWithNoLeftRewrite() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d45 FROM druid.numfoo, UNNEST(ARRAY[dim4,dim5]) as unnested (d45) where d45 IN ('a','b')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("a", "b"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"b"} + ) + ); + } + + @Test + public void testUnnestWithInvalidINFiltersOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3 IN ('foo','bar')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(new InDimFilter("j0.unnest", ImmutableSet.of("foo", "bar"), null)) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of() + ); + } + + @Test + public void testUnnestWithNotFiltersOnUnnestedColumn() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) where d3!='d' ", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING) + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(not(selector("j0.unnest", "d", null))) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{""}, + new Object[]{""}, + new Object[]{""} + ) : + ImmutableList.of( + new Object[]{"a"}, + new Object[]{"b"}, + new Object[]{"b"}, + new Object[]{"c"}, + new Object[]{""}, + new Object[]{null}, + new Object[]{null} + ) + ); + } }