From dfad1ae0eec35dc478413e57350783b0e9885c1b Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 7 Mar 2023 04:23:06 -0800 Subject: [PATCH 1/9] Various changes and fixes to UNNEST. Native changes: 1) UnnestDataSource: Replace "column" and "outputName" with "virtualColumn". This enables pushing expressions into the datasource. This in turn allows us to do the next thing... 2) UnnestStorageAdapter: Logically apply query-level filters and virtual columns after the unnest operation. (Physically, filters are pulled up, when possible.) This is beneficial because it allows filters and virtual columns to reference the unnested column, and because it is consistent with how the join datasource works. 3) Various documentation updates, including declaring "unnest" as an experimental feature for now. SQL changes: 1) Rename DruidUnnestRel (& Rule) to DruidUnnestRel (& Rule). The rel is simplified: it only handles the UNNEST part of a correlated join. Constant UNNESTs are handled with regular inline rels. 2) Rework DruidCorrelateUnnestRule to focus on pulling Projects from the left side up above the Correlate. New test testUnnestTwice verifies that this works even when two UNNESTs are stacked on the same table. 3) Include ProjectCorrelateTransposeRule from Calcite to encourage pushing mappings down below the left-hand side of the Correlate. 4) Add a new CorrelateFilterLTransposeRule and CorrelateFilterRTransposeRule to handle pulling Filters up above the Correlate. New tests testUnnestWithFiltersOutside and testUnnestTwiceWithFilters verify this behavior. 5) Require a context feature flag for SQL UNNEST, since it's undocumented. As part of this, also cleaned up how we handle feature flags in SQL. They're now hooked into EngineFeatures, which is useful because not all engines support all features. --- docs/misc/math-expr.md | 2 +- docs/querying/datasource.md | 14 +- docs/querying/multi-value-dimensions.md | 11 +- docs/querying/sql-data-types.md | 4 +- docs/tutorials/tutorial-unnest-datasource.md | 59 ++- .../druid/msq/sql/MSQTaskSqlEngine.java | 5 +- .../apache/druid/query/UnnestDataSource.java | 62 +-- .../query/scan/ScanQueryQueryToolChest.java | 4 +- .../UnnestColumnValueSelectorCursor.java | 63 ++- .../druid/segment/UnnestDimensionCursor.java | 22 +- .../druid/segment/UnnestSegmentReference.java | 15 +- .../druid/segment/UnnestStorageAdapter.java | 151 ++++-- .../join/HashJoinSegmentStorageAdapter.java | 16 +- .../virtual/ExpressionVirtualColumn.java | 57 ++- .../druid/query/QueryRunnerTestHelper.java | 10 +- .../groupby/UnnestGroupByQueryRunnerTest.java | 201 +++----- .../query/scan/UnnestScanQueryRunnerTest.java | 41 +- .../query/topn/UnnestTopNQueryRunnerTest.java | 28 +- .../org/apache/druid/segment/ListCursor.java | 18 +- .../UnnestColumnValueSelectorCursorTest.java | 61 ++- .../segment/UnnestStorageAdapterTest.java | 39 +- .../builtin/ArraySqlAggregator.java | 2 +- .../sql/calcite/expression/Expressions.java | 44 +- .../builtin/CastOperatorConversion.java | 2 + ...iValueStringToArrayOperatorConversion.java | 2 +- .../external/ExternalTableScanRule.java | 4 +- .../calcite/planner/CalciteRulesManager.java | 4 +- .../sql/calcite/planner/IngestHandler.java | 4 +- .../sql/calcite/planner/PlannerContext.java | 33 +- .../sql/calcite/planner/QueryHandler.java | 4 +- .../calcite/rel/DruidCorrelateUnnestRel.java | 241 +++++---- .../sql/calcite/rel/DruidOuterQueryRel.java | 7 +- .../druid/sql/calcite/rel/DruidQuery.java | 60 +-- .../druid/sql/calcite/rel/DruidQueryRel.java | 20 +- .../druid/sql/calcite/rel/DruidRel.java | 14 + .../druid/sql/calcite/rel/DruidRels.java | 40 +- .../druid/sql/calcite/rel/DruidUnionRel.java | 2 - .../calcite/rel/DruidUnnestDatasourceRel.java | 180 ------- .../druid/sql/calcite/rel/DruidUnnestRel.java | 198 ++++++++ .../sql/calcite/rel/PartialDruidQuery.java | 85 +--- .../rule/CorrelateFilterLTransposeRule.java | 68 +++ .../rule/CorrelateFilterRTransposeRule.java | 112 +++++ .../rule/DruidCorrelateUnnestRule.java | 219 ++++---- .../calcite/rule/DruidLogicalValuesRule.java | 13 +- .../druid/sql/calcite/rule/DruidRules.java | 18 +- .../rule/DruidUnnestDatasourceRule.java | 107 ---- .../sql/calcite/rule/DruidUnnestRule.java | 177 +++++++ .../druid/sql/calcite/run/EngineFeature.java | 19 +- .../sql/calcite/run/NativeSqlEngine.java | 5 +- .../druid/sql/calcite/run/SqlEngine.java | 2 +- .../druid/sql/calcite/table/InlineTable.java | 8 +- .../druid/sql/calcite/view/ViewSqlEngine.java | 5 +- .../sql/calcite/BaseCalciteQueryTest.java | 79 +-- .../sql/calcite/CalciteArraysQueryTest.java | 468 ++++++++++++------ .../sql/calcite/CalciteScanSignatureTest.java | 4 +- .../sql/calcite/CalciteWindowQueryTest.java | 3 +- .../sql/calcite/IngestionTestSqlEngine.java | 2 +- 57 files changed, 1897 insertions(+), 1241 deletions(-) delete mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java delete mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java diff --git a/docs/misc/math-expr.md b/docs/misc/math-expr.md index 810bdd1df33e..2f50d4102cfd 100644 --- a/docs/misc/math-expr.md +++ b/docs/misc/math-expr.md @@ -54,7 +54,7 @@ For logical operators, a number is true if and only if it is positive (0 or nega [Multi-value string dimensions](../querying/multi-value-dimensions.md) are supported and may be treated as either scalar or array typed values, as follows: * When treated as a scalar type, the expression is automatically transformed so that the scalar operation is applied across all values of the multi-valued type, mimicking Druid's native behavior. -* Druid coerces values that result in arrays back into the native Druid string type for grouping and aggregation. Grouping on multi-value string dimensions in Druid groups by the individual values, not the 'array'. This behavior produces results similar to the `UNNEST` operator available in many SQL dialects. Alternatively, you can use the `array_to_string` function to perform the aggregation on a _stringified_ version of the complete array and therefore preserve the complete row. To transform the stringified dimension back into the true native array type, use `string_to_array` in an expression post-aggregator. +* Druid coerces values that result in arrays back into the native Druid string type for grouping and aggregation. Grouping on multi-value string dimensions in Druid groups by the individual values, not the 'array'. This behavior produces results similar to an implicit SQL `UNNEST` operation. Alternatively, you can use the `array_to_string` function to perform the aggregation on a _stringified_ version of the complete array and therefore preserve the complete row. To transform the stringified dimension back into the true native array type, use `string_to_array` in an expression post-aggregator. The following built-in functions are available. diff --git a/docs/querying/datasource.md b/docs/querying/datasource.md index 0a16e1d16b17..0c9c83d0c8cf 100644 --- a/docs/querying/datasource.md +++ b/docs/querying/datasource.md @@ -373,9 +373,10 @@ always be correct. ### `unnest` -> The unnest datasource is currently only available as part of a native query. +> The unnest datasource is [experimental](../development/experimental.md). Its API and behavior are subject +> to change in future releases. It is not recommended to use this feature in production at this time. -Use the `unnest` datasource to unnest a column with multiple values in an array. +Use the `unnest` datasource to unnest a column with multiple values in an array. For example, you have a source column that looks like this: | Nested | @@ -409,7 +410,10 @@ The `unnest` datasource uses the following syntax: "type": "table", "name": "nested_data" }, - "column": "nested_source_column", + "virtualColumn": { + "type": "expression", + "expression": "\"column_reference\"" + }, "outputName": "unnested_target_column", "allowList": [] }, @@ -418,9 +422,7 @@ The `unnest` datasource uses the following syntax: * `dataSource.type`: Set this to `unnest`. * `dataSource.base`: Defines the datasource you want to unnest. * `dataSource.base.type`: The type of datasource you want to unnest, such as a table. - * `dataSource.base.name`: The name of the datasource you want to unnest. -* `dataSource.column`: The name of the source column that contains the nested values. -* `dataSource.outputName`: The name you want to assign to the column that will contain the unnested values. You can replace the source column with the unnested column by specifying the source column's name or a new column by specifying a different name. Outputting it to a new column can help you verify that you get the results that you expect but isn't required. +* `dataSource.virtualColumn`: [Virtual column](virtual-columns.md) that references the nested values. The output name of this column is reused as the name of the column that contains unnested values. You can replace the source column with the unnested column by specifying the source column's name or a new column by specifying a different name. Outputting it to a new column can help you verify that you get the results that you expect but isn't required. * `dataSource.allowList`: Optional. The subset of values you want to unnest. To learn more about how to use the `unnest` datasource, see the [unnest tutorial](../tutorials/tutorial-unnest-datasource.md). diff --git a/docs/querying/multi-value-dimensions.md b/docs/querying/multi-value-dimensions.md index 36529a4541f8..215c7e422117 100644 --- a/docs/querying/multi-value-dimensions.md +++ b/docs/querying/multi-value-dimensions.md @@ -140,12 +140,11 @@ This "selector" filter would match row4 of the dataset above: ### Grouping topN and groupBy queries can group on multi-value dimensions. When grouping on a multi-value dimension, _all_ values -from matching rows will be used to generate one group per value. This can be thought of as the equivalent to the -`UNNEST` operator used on an `ARRAY` type that many SQL dialects support. This means it's possible for a query to return -more groups than there are rows. For example, a topN on the dimension `tags` with filter `"t1" AND "t3"` would match -only row1, and generate a result with three groups: `t1`, `t2`, and `t3`. If you only need to include values that match -your filter, you can use a [filtered dimensionSpec](dimensionspecs.md#filtered-dimensionspecs). This can also -improve performance. +from matching rows will be used to generate one group per value. This behaves similarly to an implicit SQL `UNNEST` +operation. This means it's possible for a query to return more groups than there are rows. For example, a topN on the +dimension `tags` with filter `"t1" AND "t3"` would match only row1, and generate a result with three groups: +`t1`, `t2`, and `t3`. If you only need to include values that match your filter, you can use a +[filtered dimensionSpec](dimensionspecs.md#filtered-dimensionspecs). This can also improve performance. ## Example: GroupBy query with no filtering diff --git a/docs/querying/sql-data-types.md b/docs/querying/sql-data-types.md index bb1766d27e57..9e2b6739c642 100644 --- a/docs/querying/sql-data-types.md +++ b/docs/querying/sql-data-types.md @@ -78,8 +78,8 @@ applied to all values for each row individually. Multi-value string dimensions c [multi-value string functions](sql-multivalue-string-functions.md), which can perform powerful array-aware operations. Grouping by a multi-value expression observes the native Druid multi-value aggregation behavior, which is similar to -the `UNNEST` functionality available in some other SQL dialects. Refer to the documentation on -[multi-value string dimensions](multi-value-dimensions.md) for additional details. +an implicit SQL `UNNEST`. Refer to the documentation on [multi-value string dimensions](multi-value-dimensions.md) +for additional details. > Because multi-value dimensions are treated by the SQL planner as `VARCHAR`, there are some inconsistencies between how > they are handled in Druid SQL and in native queries. For example, expressions involving multi-value dimensions may be diff --git a/docs/tutorials/tutorial-unnest-datasource.md b/docs/tutorials/tutorial-unnest-datasource.md index a35fc713d6eb..95769a4f690e 100644 --- a/docs/tutorials/tutorial-unnest-datasource.md +++ b/docs/tutorials/tutorial-unnest-datasource.md @@ -25,7 +25,8 @@ title: "Tutorial: Unnest data in a column" > If you're looking for information about how to unnest `COMPLEX` columns, see [Nested columns](../querying/nested-columns.md). -> The unnest datasource is currently only available as part of a native query. +> The unnest datasource is [experimental](../development/experimental.md). Its API and behavior are subject +> to change in future releases. It is not recommended to use this feature in production at this time. This tutorial demonstrates how to use the unnest datasource to unnest a column that has data stored in arrays. For example, if you have a column named `dim3` with values like `[a,b]` or `[c,d,f]`, the unnest datasource can output the data to a new column with individual rows that contain single values like `a` and `b`. When doing this, be mindful of the following: @@ -161,9 +162,11 @@ The following native Scan query returns the rows of the datasource and unnests t "type": "table", "name": "nested_data" }, - "column": "dim3", - "outputName": "unnest-dim3", - "allowList": [] + "virtualColumn": { + "type": "expression", + "name": "unnest-dim3", + "expression": "\"dim3\"" + } }, "intervals": { "type": "intervals", @@ -222,9 +225,11 @@ The following query returns an unnested version of the column `dim3` as the colu "dataSource": { "type": "unnest", "base": "nested_data", - "column": "dim3", - "outputName": "unnest-dim3", - "allowList": [] + "virtualColumn": { + "type": "expression", + "name": "unnest-dim3", + "expression": "\"dim3\"" + } }, "intervals": ["-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"], "granularity": "all", @@ -264,8 +269,11 @@ The example topN query unnests `dim3` into the column `unnest-dim3`. The query u "type": "table", "name": "nested_data" }, - "column": "dim3", - "outputName": "unnest-dim3", + "virtualColumn": { + "type": "expression", + "name": "unnest-dim3", + "expression": "\"dim3\"" + }, "allowList": null }, "dimension": { @@ -369,9 +377,11 @@ This query joins the `nested_data` table with itself and outputs the unnested da "condition": "(\"m1\" == \"j0.v0\")", "joinType": "INNER" }, - "column": "dim3", - "outputName": "unnest-dim3", - "allowList": [] + "virtualColumn": { + "type": "expression", + "name": "unnest-dim3", + "expression": "\"dim3\"" + } }, "intervals": { "type": "intervals", @@ -520,13 +530,15 @@ When you run the query, pay special attention to how the total number of rows ha "type": "table", "name": "nested_data2" }, - "column": "dim3", - "outputName": "unnest-dim3", + "virtualColumn": { + "type": "expression", + "name": "unnest-dim3", + "expression": "\"dim3\"" + }, "allowList": [] }, "column": "dim2", - "outputName": "unnest-dim2", - "allowList": [] + "outputName": "unnest-dim2" }, "intervals": { "type": "intervals", @@ -583,9 +595,11 @@ You can also use the `unnest` datasource to unnest an inline datasource. The fol ] ] }, - "column": "inline_data", - "outputName": "output", - "allowList": [] + "virtualColumn": { + "type": "expression", + "name": "output", + "expression": "\"inline_data\"" + } }, "intervals": { "type": "intervals", @@ -625,8 +639,11 @@ The following Scan query uses the `nested_data2` table you created in [Load data "type": "table", "name": "nested_data2" }, - "column": "v0", - "outputName": "unnest-v0" + "virtualColumn": { + "type": "expression", + "name": "unnest-v0", + "expression": "\"v0\"" + } } "intervals": { "type": "intervals", diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java index e62e5d5d100c..154396620568 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/sql/MSQTaskSqlEngine.java @@ -104,13 +104,16 @@ public RelDataType resultTypeForInsert(RelDataTypeFactory typeFactory, RelDataTy } @Override - public boolean feature(EngineFeature feature, PlannerContext plannerContext) + public boolean featureAvailable(EngineFeature feature, PlannerContext plannerContext) { switch (feature) { case ALLOW_BINDABLE_PLAN: case TIMESERIES_QUERY: case TOPN_QUERY: case TIME_BOUNDARY_QUERY: + case GROUPING_SETS: + case WINDOW_FUNCTIONS: + case UNNEST: return false; case CAN_SELECT: case CAN_INSERT: diff --git a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java index 407aea5c39b9..2bb24784adfb 100644 --- a/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java +++ b/processing/src/main/java/org/apache/druid/query/UnnestDataSource.java @@ -26,6 +26,7 @@ import org.apache.druid.query.planning.DataSourceAnalysis; import org.apache.druid.segment.SegmentReference; import org.apache.druid.segment.UnnestSegmentReference; +import org.apache.druid.segment.VirtualColumn; import org.apache.druid.utils.JvmUtils; import javax.annotation.Nullable; @@ -48,32 +49,28 @@ public class UnnestDataSource implements DataSource { private final DataSource base; - private final String column; - private final String outputName; + private final VirtualColumn virtualColumn; private final LinkedHashSet allowList; private UnnestDataSource( DataSource dataSource, - String columnName, - String outputName, + VirtualColumn virtualColumn, LinkedHashSet allowList ) { this.base = dataSource; - this.column = columnName; - this.outputName = outputName; + this.virtualColumn = virtualColumn; this.allowList = allowList; } @JsonCreator public static UnnestDataSource create( @JsonProperty("base") DataSource base, - @JsonProperty("column") String columnName, - @JsonProperty("outputName") String outputName, + @JsonProperty("virtualColumn") VirtualColumn virtualColumn, @Nullable @JsonProperty("allowList") LinkedHashSet allowList ) { - return new UnnestDataSource(base, columnName, outputName, allowList); + return new UnnestDataSource(base, virtualColumn, allowList); } @JsonProperty("base") @@ -82,16 +79,10 @@ public DataSource getBase() return base; } - @JsonProperty("column") - public String getColumn() + @JsonProperty("virtualColumn") + public VirtualColumn getVirtualColumn() { - return column; - } - - @JsonProperty("outputName") - public String getOutputName() - { - return outputName; + return virtualColumn; } @JsonProperty("allowList") @@ -118,7 +109,7 @@ public DataSource withChildren(List children) if (children.size() != 1) { throw new IAE("Expected [1] child, got [%d]", children.size()); } - return new UnnestDataSource(children.get(0), column, outputName, allowList); + return new UnnestDataSource(children.get(0), virtualColumn, allowList); } @Override @@ -151,22 +142,13 @@ public Function createSegmentMapFunction( ); return JvmUtils.safeAccumulateThreadCpuTime( cpuTimeAccumulator, - () -> { - if (column == null) { - return segmentMapFn; - } else if (column.isEmpty()) { - return segmentMapFn; - } else { - return - baseSegment -> - new UnnestSegmentReference( - segmentMapFn.apply(baseSegment), - column, - outputName, - allowList - ); - } - } + () -> + baseSegment -> + new UnnestSegmentReference( + segmentMapFn.apply(baseSegment), + virtualColumn, + allowList + ) ); } @@ -174,7 +156,7 @@ public Function createSegmentMapFunction( @Override public DataSource withUpdatedDataSource(DataSource newSource) { - return new UnnestDataSource(newSource, column, outputName, allowList); + return new UnnestDataSource(newSource, virtualColumn, allowList); } @Override @@ -205,15 +187,14 @@ public boolean equals(Object o) return false; } UnnestDataSource that = (UnnestDataSource) o; - return column.equals(that.column) - && outputName.equals(that.outputName) + return virtualColumn.equals(that.virtualColumn) && base.equals(that.base); } @Override public int hashCode() { - return Objects.hash(base, column, outputName); + return Objects.hash(base, virtualColumn); } @Override @@ -221,8 +202,7 @@ public String toString() { return "UnnestDataSource{" + "base=" + base + - ", column='" + column + '\'' + - ", outputName='" + outputName + '\'' + + ", column='" + virtualColumn + '\'' + ", allowList=" + allowList + '}'; } diff --git a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java index 70bba3a77714..4a48563689a7 100644 --- a/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/scan/ScanQueryQueryToolChest.java @@ -36,6 +36,7 @@ import org.apache.druid.query.QueryToolChest; import org.apache.druid.query.aggregation.MetricManipulationFn; import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.utils.CloseableUtils; @@ -172,7 +173,8 @@ public RowSignature resultArraySignature(final ScanQuery query) final VirtualColumn virtualColumn = query.getVirtualColumns().getVirtualColumn(columnName); if (virtualColumn != null) { - columnType = virtualColumn.capabilities(columnName).toColumnType(); + final ColumnCapabilities capabilities = virtualColumn.capabilities(c -> null, columnName); + columnType = capabilities != null ? capabilities.toColumnType() : null; } else { // Unknown type. In the future, it would be nice to have a way to fill these in. columnType = null; diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java index 5d4340329897..1a3bbf4e0015 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestColumnValueSelectorCursor.java @@ -28,8 +28,8 @@ import org.joda.time.DateTime; import javax.annotation.Nullable; -import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; @@ -63,7 +63,7 @@ public class UnnestColumnValueSelectorCursor implements Cursor private final Cursor baseCursor; private final ColumnSelectorFactory baseColumnSelectorFactory; private final ColumnValueSelector columnValueSelector; - private final String columnName; + private final VirtualColumn unnestColumn; private final String outputName; private final LinkedHashSet allowSet; private int index; @@ -73,16 +73,19 @@ public class UnnestColumnValueSelectorCursor implements Cursor public UnnestColumnValueSelectorCursor( Cursor cursor, - ColumnSelectorFactory baseColumSelectorFactory, - String columnName, + ColumnSelectorFactory baseColumnSelectorFactory, + VirtualColumn unnestColumn, String outputColumnName, LinkedHashSet allowSet ) { this.baseCursor = cursor; - this.baseColumnSelectorFactory = baseColumSelectorFactory; - this.columnValueSelector = this.baseColumnSelectorFactory.makeColumnValueSelector(columnName); - this.columnName = columnName; + this.baseColumnSelectorFactory = baseColumnSelectorFactory; + this.columnValueSelector = unnestColumn.makeColumnValueSelector( + unnestColumn.getOutputName(), + this.baseColumnSelectorFactory + ); + this.unnestColumn = unnestColumn; this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; @@ -215,14 +218,21 @@ public ColumnCapabilities getColumnCapabilities(String column) if (!outputName.equals(column)) { return baseColumnSelectorFactory.getColumnCapabilities(column); } - final ColumnCapabilities capabilities = baseColumnSelectorFactory.getColumnCapabilities(columnName); - if (capabilities.isArray()) { + + final ColumnCapabilities capabilities = unnestColumn.capabilities( + baseColumnSelectorFactory, + unnestColumn.getOutputName() + ); + + if (capabilities == null) { + return null; + } else if (capabilities.isArray()) { return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType()); - } - if (capabilities.hasMultipleValues().isTrue()) { + } else if (capabilities.hasMultipleValues().isTrue()) { return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false); + } else { + return capabilities; } - return baseColumnSelectorFactory.getColumnCapabilities(columnName); } }; } @@ -276,28 +286,18 @@ public void reset() /** * This method populates the objects when the base cursor moves to the next row - * - * @param firstRun flag to populate one time object references to hold values for unnest cursor */ - private void getNextRow(boolean firstRun) + private void getNextRow() { currentVal = this.columnValueSelector.getObject(); if (currentVal == null) { - if (!firstRun) { - unnestListForCurrentRow = new ArrayList<>(); - } - unnestListForCurrentRow.add(null); + unnestListForCurrentRow = Collections.singletonList(null); + } else if (currentVal instanceof List) { + unnestListForCurrentRow = (List) currentVal; + } else if (currentVal instanceof Object[]) { + unnestListForCurrentRow = Arrays.asList((Object[]) currentVal); } else { - if (currentVal instanceof List) { - unnestListForCurrentRow = (List) currentVal; - } else if (currentVal instanceof Object[]) { - unnestListForCurrentRow = Arrays.asList((Object[]) currentVal); - } else if (currentVal.getClass().equals(String.class)) { - if (!firstRun) { - unnestListForCurrentRow = new ArrayList<>(); - } - unnestListForCurrentRow.add(currentVal); - } + unnestListForCurrentRow = Collections.singletonList(currentVal); } } @@ -309,8 +309,7 @@ private void getNextRow(boolean firstRun) */ private void initialize() { - this.unnestListForCurrentRow = new ArrayList<>(); - getNextRow(needInitialization); + getNextRow(); if (allowSet != null) { if (!allowSet.isEmpty()) { if (!allowSet.contains((String) unnestListForCurrentRow.get(index))) { @@ -333,7 +332,7 @@ private void advanceAndUpdate() index = 0; baseCursor.advance(); if (!baseCursor.isDone()) { - getNextRow(needInitialization); + getNextRow(); } } else { index++; diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java index 93a56767bbfb..ba91f27815d8 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestDimensionCursor.java @@ -77,28 +77,32 @@ public class UnnestDimensionCursor implements Cursor { private final Cursor baseCursor; private final DimensionSelector dimSelector; - private final String columnName; + private final VirtualColumn unnestColumn; private final String outputName; private final LinkedHashSet allowSet; private final BitSet allowedBitSet; private final ColumnSelectorFactory baseColumnSelectorFactory; private int index; - @Nullable private IndexedInts indexedIntsForCurrentRow; + @Nullable + private IndexedInts indexedIntsForCurrentRow; private boolean needInitialization; private SingleIndexInts indexIntsForRow; public UnnestDimensionCursor( Cursor cursor, ColumnSelectorFactory baseColumnSelectorFactory, - String columnName, + VirtualColumn unnestColumn, String outputColumnName, LinkedHashSet allowSet ) { this.baseCursor = cursor; this.baseColumnSelectorFactory = baseColumnSelectorFactory; - this.dimSelector = this.baseColumnSelectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(columnName)); - this.columnName = columnName; + this.dimSelector = unnestColumn.makeDimensionSelector( + DefaultDimensionSpec.of(unnestColumn.getOutputName()), + this.baseColumnSelectorFactory + ); + this.unnestColumn = unnestColumn; this.index = 0; this.outputName = outputColumnName; this.needInitialization = true; @@ -254,14 +258,18 @@ public ColumnCapabilities getColumnCapabilities(String column) // This is fine for STRING types // But going forward if the dimension to be unnested is of type ARRAY, // this should strip down to the base type of the array - final ColumnCapabilities capabilities = baseColumnSelectorFactory.getColumnCapabilities(columnName); + final ColumnCapabilities capabilities = unnestColumn.capabilities( + baseColumnSelectorFactory, + unnestColumn.getOutputName() + ); + if (capabilities.isArray()) { return ColumnCapabilitiesImpl.copyOf(capabilities).setType(capabilities.getElementType()); } if (capabilities.hasMultipleValues().isTrue()) { return ColumnCapabilitiesImpl.copyOf(capabilities).setHasMultipleValues(false); } - return baseColumnSelectorFactory.getColumnCapabilities(columnName); + return capabilities; } }; } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java index 9da6b8132cbb..a5db64d2b2d3 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestSegmentReference.java @@ -40,15 +40,17 @@ public class UnnestSegmentReference implements SegmentReference private static final Logger log = new Logger(UnnestSegmentReference.class); private final SegmentReference baseSegment; - private final String dimension; - private final String renamedOutputDimension; + private final VirtualColumn unnestColumn; private final LinkedHashSet allowSet; - public UnnestSegmentReference(SegmentReference baseSegment, String dimension, String outputName, LinkedHashSet allowList) + public UnnestSegmentReference( + SegmentReference baseSegment, + VirtualColumn unnestColumn, + LinkedHashSet allowList + ) { this.baseSegment = baseSegment; - this.dimension = dimension; - this.renamedOutputDimension = outputName; + this.unnestColumn = unnestColumn; this.allowSet = allowList; } @@ -101,8 +103,7 @@ public StorageAdapter asStorageAdapter() { return new UnnestStorageAdapter( baseSegment.asStorageAdapter(), - dimension, - renamedOutputDimension, + unnestColumn, allowSet ); } diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index f76ab89270af..a8ee22d570de 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -20,6 +20,7 @@ package org.apache.druid.segment; import com.google.common.collect.Lists; +import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; @@ -30,13 +31,19 @@ import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.AndFilter; +import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.join.PostJoinCursor; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; -import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedHashSet; +import java.util.List; import java.util.Objects; +import java.util.Set; /** * This class serves as the Storage Adapter for the Unnest Segment and is responsible for creating the cursors @@ -46,20 +53,19 @@ public class UnnestStorageAdapter implements StorageAdapter { private final StorageAdapter baseAdapter; - private final String dimensionToUnnest; + private final VirtualColumn unnestColumn; private final String outputColumnName; private final LinkedHashSet allowSet; public UnnestStorageAdapter( final StorageAdapter baseAdapter, - final String dimension, - final String outputColumnName, + final VirtualColumn unnestColumn, final LinkedHashSet allowSet ) { this.baseAdapter = baseAdapter; - this.dimensionToUnnest = dimension; - this.outputColumnName = outputColumnName; + this.unnestColumn = unnestColumn; + this.outputColumnName = unnestColumn.getOutputName(); this.allowSet = allowSet; } @@ -73,22 +79,12 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { - Filter updatedFilter; - if (allowSet != null && !allowSet.isEmpty()) { - final InDimFilter allowListFilters; - allowListFilters = new InDimFilter(dimensionToUnnest, allowSet); - if (filter != null) { - updatedFilter = new AndFilter(Arrays.asList(filter, allowListFilters)); - } else { - updatedFilter = allowListFilters; - } - } else { - updatedFilter = filter; - } + final Pair filterPair = computeBaseAndPostJoinFilters(filter, virtualColumns); + final Sequence baseCursorSequence = baseAdapter.makeCursors( - updatedFilter, + filterPair.lhs, interval, - virtualColumns, + VirtualColumns.create(Collections.singletonList(unnestColumn)), gran, descending, queryMetrics @@ -99,13 +95,16 @@ public Sequence makeCursors( cursor -> { Objects.requireNonNull(cursor); Cursor retVal = cursor; - ColumnCapabilities capabilities = cursor.getColumnSelectorFactory().getColumnCapabilities(dimensionToUnnest); + ColumnCapabilities capabilities = unnestColumn.capabilities( + cursor.getColumnSelectorFactory(), + unnestColumn.getOutputName() + ); if (capabilities != null) { if (capabilities.isDictionaryEncoded().and(capabilities.areDictionaryValuesUnique()).isTrue()) { retVal = new UnnestDimensionCursor( retVal, retVal.getColumnSelectorFactory(), - dimensionToUnnest, + unnestColumn, outputColumnName, allowSet ); @@ -113,7 +112,7 @@ public Sequence makeCursors( retVal = new UnnestColumnValueSelectorCursor( retVal, retVal.getColumnSelectorFactory(), - dimensionToUnnest, + unnestColumn, outputColumnName, allowSet ); @@ -122,12 +121,16 @@ public Sequence makeCursors( retVal = new UnnestColumnValueSelectorCursor( retVal, retVal.getColumnSelectorFactory(), - dimensionToUnnest, + unnestColumn, outputColumnName, allowSet ); } - return retVal; + return PostJoinCursor.wrap( + retVal, + virtualColumns, + filterPair.rhs + ); } ); } @@ -162,7 +165,7 @@ public int getDimensionCardinality(String column) if (!outputColumnName.equals(column)) { return baseAdapter.getDimensionCardinality(column); } - return baseAdapter.getDimensionCardinality(dimensionToUnnest); + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Override @@ -181,30 +184,33 @@ public DateTime getMaxTime() @Override public Comparable getMinValue(String column) { - if (!outputColumnName.equals(column)) { - return baseAdapter.getMinValue(column); + if (outputColumnName.equals(column)) { + return null; } - return baseAdapter.getMinValue(dimensionToUnnest); + + return baseAdapter.getMinValue(column); } @Nullable @Override public Comparable getMaxValue(String column) { - if (!outputColumnName.equals(column)) { - return baseAdapter.getMaxValue(column); + if (outputColumnName.equals(column)) { + return null; } - return baseAdapter.getMaxValue(dimensionToUnnest); + + return baseAdapter.getMaxValue(column); } @Nullable @Override public ColumnCapabilities getColumnCapabilities(String column) { - if (!outputColumnName.equals(column)) { - return baseAdapter.getColumnCapabilities(column); + if (outputColumnName.equals(column)) { + return unnestColumn.capabilities(baseAdapter, column); } - return baseAdapter.getColumnCapabilities(dimensionToUnnest); + + return baseAdapter.getColumnCapabilities(column); } @Override @@ -226,9 +232,80 @@ public Metadata getMetadata() return baseAdapter.getMetadata(); } - public String getDimensionToUnnest() + public VirtualColumn getUnnestColumn() + { + return unnestColumn; + } + + private Pair computeBaseAndPostJoinFilters( + @Nullable final Filter queryFilter, + final VirtualColumns queryVirtualColumns + ) + { + class FilterSplitter + { + final List preFilters = new ArrayList<>(); + final List postFilters = new ArrayList<>(); + + void add(@Nullable final Filter filter) + { + if (filter == null) { + return; + } + + final Set requiredColumns = filter.getRequiredColumns(); + + if (requiredColumns.contains(outputColumnName)) { + postFilters.add(filter); + } else { + if (queryVirtualColumns.getVirtualColumns().length > 0) { + for (String column : requiredColumns) { + if (queryVirtualColumns.exists(column)) { + postFilters.add(filter); + return; + } + } + } + + preFilters.add(filter); + } + } + } + + final FilterSplitter filterSplitter = new FilterSplitter(); + + if (allowSet != null && !allowSet.isEmpty()) { + final String inputColumn = getUnnestInputIfDirectAccess(); + + // Filter on input column if possible (it may be faster); otherwise use output column. + filterSplitter.add(new InDimFilter(inputColumn != null ? inputColumn : outputColumnName, allowSet)); + } + + if (queryFilter instanceof AndFilter) { + for (Filter filter : ((AndFilter) queryFilter).getFilters()) { + filterSplitter.add(filter); + } + } else { + filterSplitter.add(queryFilter); + } + + return Pair.of( + Filters.maybeAnd(filterSplitter.preFilters).orElse(null), + Filters.maybeAnd(filterSplitter.postFilters).orElse(null) + ); + } + + /** + * Returns the input of {@link #unnestColumn}, if it's a direct access; otherwise returns null. + */ + @Nullable + private String getUnnestInputIfDirectAccess() { - return dimensionToUnnest; + if (unnestColumn instanceof ExpressionVirtualColumn) { + return ((ExpressionVirtualColumn) unnestColumn).getParsedExpression().get().getBindingIfIdentifier(); + } else { + return null; + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java index 9eb5f3dbde25..c71264481121 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java @@ -292,10 +292,18 @@ public Sequence makeCursors( ); final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey(); + final JoinFilterSplit joinFilterSplit; - if (!keyIn.equals(keyCached)) { - // It is a bug if this happens. The implied key and the cached key should always match. - throw new ISE("Pre-analysis mismatch, cannot execute query"); + if (keyIn.equals(keyCached)) { + // Common case: key used during filter pre-analysis (keyCached) matches key implied by makeCursors call (keyIn). + joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter); + } else { + // Less common case: key differs. Re-analyze the filter. This case can happen when an unnest datasource is + // layered on top of a join datasource. + joinFilterSplit = JoinFilterAnalyzer.splitFilter( + JoinFilterAnalyzer.computeJoinFilterPreAnalysis(keyIn), + baseFilter + ); } final List preJoinVirtualColumns = new ArrayList<>(); @@ -309,10 +317,8 @@ public Sequence makeCursors( // We merge the filter on base table specified by the user and filter on the base table that is pushed from // the join - JoinFilterSplit joinFilterSplit = JoinFilterAnalyzer.splitFilter(joinFilterPreAnalysis, baseFilter); preJoinVirtualColumns.addAll(joinFilterSplit.getPushDownVirtualColumns()); - final Sequence baseCursorSequence = baseAdapter.makeCursors( joinFilterSplit.getBaseTableFilter().isPresent() ? joinFilterSplit.getBaseTableFilter().get() : null, interval, diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java index 286626baf80a..80d3d4d48fb4 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ExpressionVirtualColumn.java @@ -84,7 +84,7 @@ public ExpressionVirtualColumn( public ExpressionVirtualColumn( String name, Expr parsedExpression, - ColumnType outputType + @Nullable ColumnType outputType ) { this.name = Preconditions.checkNotNull(name, "name"); @@ -129,6 +129,12 @@ public DimensionSelector makeDimensionSelector( final ColumnSelectorFactory columnSelectorFactory ) { + if (isDirectAccess(columnSelectorFactory)) { + return columnSelectorFactory.makeDimensionSelector( + dimensionSpec.withDimension(parsedExpression.get().getBindingIfIdentifier()) + ); + } + return dimensionSpec.decorate( ExpressionSelectors.makeDimensionSelector( columnSelectorFactory, @@ -141,6 +147,10 @@ public DimensionSelector makeDimensionSelector( @Override public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnSelectorFactory factory) { + if (isDirectAccess(factory)) { + return factory.makeColumnValueSelector(parsedExpression.get().getBindingIfIdentifier()); + } + final ColumnCapabilities capabilities = capabilities(factory, name); // we make a special column value selector for values that are expected to be STRING to conform to behavior of // other single and multi-value STRING selectors, whose getObject is expected to produce a single STRING value @@ -154,6 +164,11 @@ public ColumnValueSelector makeColumnValueSelector(String columnName, ColumnS @Override public boolean canVectorize(ColumnInspector inspector) { + if (isDirectAccess(inspector)) { + // Can vectorize if the underlying adapter can vectorize. + return true; + } + final ExpressionPlan plan = ExpressionPlanner.plan(inspector, parsedExpression.get()); return plan.is(ExpressionPlan.Trait.VECTORIZABLE); } @@ -164,18 +179,32 @@ public SingleValueDimensionVectorSelector makeSingleValueVectorDimensionSelector VectorColumnSelectorFactory factory ) { + if (isDirectAccess(factory)) { + return factory.makeSingleValueDimensionSelector( + dimensionSpec.withDimension(parsedExpression.get().getBindingIfIdentifier()) + ); + } + return ExpressionVectorSelectors.makeSingleValueDimensionVectorSelector(factory, parsedExpression.get()); } @Override public VectorValueSelector makeVectorValueSelector(String columnName, VectorColumnSelectorFactory factory) { + if (isDirectAccess(factory)) { + return factory.makeValueSelector(parsedExpression.get().getBindingIfIdentifier()); + } + return ExpressionVectorSelectors.makeVectorValueSelector(factory, parsedExpression.get()); } @Override public VectorObjectSelector makeVectorObjectSelector(String columnName, VectorColumnSelectorFactory factory) { + if (isDirectAccess(factory)) { + return factory.makeObjectSelector(parsedExpression.get().getBindingIfIdentifier()); + } + return ExpressionVectorSelectors.makeVectorObjectSelector(factory, parsedExpression.get()); } @@ -198,6 +227,10 @@ public ColumnCapabilities capabilities(String columnName) @Override public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName) { + if (isDirectAccess(inspector)) { + return inspector.getColumnCapabilities(parsedExpression.get().getBindingIfIdentifier()); + } + final ExpressionPlan plan = ExpressionPlanner.plan(inspector, parsedExpression.get()); final ColumnCapabilities inferred = plan.inferColumnCapabilities(outputType); // if we can infer the column capabilities from the expression plan, then use that @@ -278,6 +311,28 @@ public String toString() '}'; } + /** + * Whether this expression is an identifier that directly accesses an underlying column. In this case we skip + * the expression system entirely, and directly return backing columns. + */ + private boolean isDirectAccess(final ColumnInspector inspector) + { + if (parsedExpression.get().isIdentifier()) { + final ColumnCapabilities baseCapabilities = + inspector.getColumnCapabilities(parsedExpression.get().getBindingIfIdentifier()); + + if (outputType == null) { + // No desired output type. Anything from the source is fine. + return true; + } else if (baseCapabilities != null && outputType.equals(baseCapabilities.toColumnType())) { + // Desired output type matches the type from the source. + return true; + } + } + + return false; + } + private Supplier makeCacheKeySupplier() { return Suppliers.memoize(() -> { diff --git a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java index 666af0d2e615..aec318aa7a9e 100644 --- a/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java +++ b/processing/src/test/java/org/apache/druid/query/QueryRunnerTestHelper.java @@ -32,6 +32,7 @@ import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.js.JavaScriptConfig; +import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory; @@ -66,6 +67,7 @@ import org.apache.druid.segment.SegmentReference; import org.apache.druid.segment.TestIndex; import org.apache.druid.segment.incremental.IncrementalIndex; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.timeline.SegmentId; import org.apache.druid.timeline.TimelineObjectHolder; import org.apache.druid.timeline.VersionedIntervalTimeline; @@ -104,8 +106,12 @@ public class QueryRunnerTestHelper public static final DataSource UNNEST_DATA_SOURCE = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", + null, + ExprMacroTable.nil() + ), null ); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java index cc2a722d4606..826d612f678c 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/UnnestGroupByQueryRunnerTest.java @@ -26,7 +26,9 @@ import com.google.common.collect.ImmutableMap; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.DataSource; +import org.apache.druid.query.DirectQueryProcessingPool; import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryRunner; @@ -49,7 +51,6 @@ import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; -import org.joda.time.DateTime; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Rule; @@ -227,28 +228,18 @@ private static ResultRow makeRow(final GroupByQuery query, final String timestam return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp, vals); } - private static ResultRow makeRow(final GroupByQuery query, final DateTime timestamp, final Object... vals) - { - return GroupByQueryRunnerTestHelper.createExpectedRow(query, timestamp, vals); - } - - private static List makeRows( - final GroupByQuery query, - final String[] columnNames, - final Object[]... values - ) - { - return GroupByQueryRunnerTestHelper.createExpectedRows(query, columnNames, values); - } - @Test public void testGroupBy() { GroupByQuery query = makeQueryBuilder() .setDataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", + null, + ExprMacroTable.nil() + ), null )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) @@ -443,17 +434,8 @@ public void testGroupBy() 252L ) ); - final IncrementalIndex rtIndex = TestIndex.getIncrementalTestIndex(); - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - factory, - new IncrementalIndexSegment( - rtIndex, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); + + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy"); } @@ -466,8 +448,12 @@ public void testGroupByOnMissingColumn() GroupByQuery query = makeQueryBuilder() .setDataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", + null, + ExprMacroTable.nil() + ), null )) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) @@ -487,17 +473,8 @@ public void testGroupByOnMissingColumn() "rows", 52L ) ); - final IncrementalIndex rtIndex = TestIndex.getIncrementalTestIndex(); - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - factory, - new IncrementalIndexSegment( - rtIndex, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); + + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "missing-column"); } @@ -509,9 +486,8 @@ public void testGroupByOnUnnestedColumn() GroupByQuery query = makeQueryBuilder() .setDataSource(QueryRunnerTestHelper.UNNEST_DATA_SOURCE) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) - .setDimensions( - new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0") - ).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) .setGranularity(QueryRunnerTestHelper.ALL_GRAN) .build(); @@ -524,12 +500,6 @@ public void testGroupByOnUnnestedColumn() "alias0", "a", "rows", 2L ), - makeRow( - query, - "2011-04-01", - "alias0", "preferred", - "rows", 26L - ), makeRow( query, "2011-04-01", @@ -566,6 +536,12 @@ public void testGroupByOnUnnestedColumn() "alias0", "p", "rows", 6L ), + makeRow( + query, + "2011-04-01", + "alias0", "preferred", + "rows", 26L + ), makeRow( query, "2011-04-01", @@ -573,17 +549,8 @@ public void testGroupByOnUnnestedColumn() "rows", 4L ) ); - final IncrementalIndex rtIndex = TestIndex.getIncrementalTestIndex(); - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - factory, - new IncrementalIndexSegment( - rtIndex, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); + + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-column"); } @@ -594,8 +561,12 @@ public void testGroupByOnUnnestedVirtualColumn() final DataSource unnestDataSource = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "mv_to_array(placementish)", + ColumnType.STRING_ARRAY, + TestExprMacroTable.INSTANCE + ), null ); @@ -606,14 +577,6 @@ public void testGroupByOnUnnestedVirtualColumn() new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0") ).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) .setGranularity(QueryRunnerTestHelper.ALL_GRAN) - .setVirtualColumns( - new ExpressionVirtualColumn( - "vc", - "mv_to_array(placementish)", - ColumnType.STRING_ARRAY, - TestExprMacroTable.INSTANCE - ) - ) .addOrderByColumn("alias0", OrderByColumnSpec.Direction.ASCENDING) .build(); @@ -623,13 +586,7 @@ public void testGroupByOnUnnestedVirtualColumn() makeRow( query, "2011-04-01", - "alias0", "preferred", - "rows", 26L - ), - makeRow( - query, - "2011-04-01", - "alias0", "e", + "alias0", "a", "rows", 2L ), makeRow( @@ -641,13 +598,13 @@ public void testGroupByOnUnnestedVirtualColumn() makeRow( query, "2011-04-01", - "alias0", "h", + "alias0", "e", "rows", 2L ), makeRow( query, "2011-04-01", - "alias0", "a", + "alias0", "h", "rows", 2L ), makeRow( @@ -668,6 +625,12 @@ public void testGroupByOnUnnestedVirtualColumn() "alias0", "p", "rows", 6L ), + makeRow( + query, + "2011-04-01", + "alias0", "preferred", + "rows", 26L + ), makeRow( query, "2011-04-01", @@ -676,18 +639,7 @@ public void testGroupByOnUnnestedVirtualColumn() ) ); - final IncrementalIndex rtIndex = TestIndex.getIncrementalTestIndex(); - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - factory, - new IncrementalIndexSegment( - rtIndex, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); - + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-column"); } @@ -698,63 +650,47 @@ public void testGroupByOnUnnestedVirtualMultiColumn() final DataSource unnestDataSource = UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "array(\"market\",\"quality\")", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), null ); GroupByQuery query = makeQueryBuilder() .setDataSource(unnestDataSource) .setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD) - .setDimensions( - new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0") - ).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) + .setDimensions(new DefaultDimensionSpec(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "alias0")) + .setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT) .setGranularity(QueryRunnerTestHelper.ALL_GRAN) - .setVirtualColumns( - new ExpressionVirtualColumn( - "vc", - "array(\"market\",\"quality\")", - ColumnType.STRING, - TestExprMacroTable.INSTANCE - ) - ) .setLimit(3) .build(); - // Total rows should add up to 26 * 2 = 52 - // 26 rows and each has 2 entries in the column to be unnested + // Each count should be 2, since we are unnesting "market" and "quality", which are singly-valued fields. List expectedResults = Arrays.asList( makeRow( query, "2011-04-01", - "alias0", "business", + "alias0", "automotive", "rows", 2L ), makeRow( query, "2011-04-01", - "alias0", "health", + "alias0", "business", "rows", 2L ), makeRow( query, "2011-04-01", - "alias0", "travel", + "alias0", "entertainment", "rows", 2L ) ); - final IncrementalIndex rtIndex = TestIndex.getIncrementalTestIndex(); - final QueryRunner queryRunner = QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( - factory, - new IncrementalIndexSegment( - rtIndex, - QueryRunnerTestHelper.SEGMENT_ID - ), - query, - "rtIndexvc" - ); - Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); + Iterable results = runQuery(query, TestIndex.getIncrementalTestIndex()); TestHelper.assertExpectedObjects(expectedResults, results, "groupBy-on-unnested-virtual-columns"); } @@ -767,13 +703,24 @@ private GroupByQuery.Builder makeQueryBuilder() return GroupByQuery.builder().overrideContext(makeContext()); } - /** - * Use this method instead of makeQueryBuilder() to make sure the context is set properly. Also, avoid - * setContext in tests. Only use overrideContext. - */ - private GroupByQuery.Builder makeQueryBuilder(final GroupByQuery query) + private Iterable runQuery(final GroupByQuery query, final IncrementalIndex index) { - return new GroupByQuery.Builder(query).overrideContext(makeContext()); + final QueryRunner queryRunner = factory.mergeRunners( + DirectQueryProcessingPool.INSTANCE, + Collections.singletonList( + QueryRunnerTestHelper.makeQueryRunnerWithSegmentMapFn( + factory, + new IncrementalIndexSegment( + index, + QueryRunnerTestHelper.SEGMENT_ID + ), + query, + "rtIndexvc" + ) + ) + ); + + return GroupByQueryRunnerTestHelper.runQuery(factory, queryRunner, query); } private Map makeContext() diff --git a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java index 4de22cb00610..160f06140c40 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/UnnestScanQueryRunnerTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.Lists; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.DefaultGenericQueryMetricsFactory; import org.apache.druid.query.Druids; import org.apache.druid.query.QueryPlus; @@ -102,8 +103,12 @@ private Druids.ScanQueryBuilder newTestUnnestQueryWithAllowSet() return Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION, - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "\"" + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION + "\"", + null, + ExprMacroTable.nil() + ), allowSet )) .columns(Collections.emptyList()) @@ -178,21 +183,17 @@ public void testUnnestRunnerVirtualColumnsUsingSingleColumn() .intervals(I_0112_0114) .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "mv_to_array(placementish)", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), null )) .columns(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() .legacy(legacy) - .virtualColumns( - new ExpressionVirtualColumn( - "vc", - "mv_to_array(placementish)", - ColumnType.STRING, - TestExprMacroTable.INSTANCE - ) - ) .limit(3) .build(); @@ -252,21 +253,17 @@ public void testUnnestRunnerVirtualColumnsUsingMultipleColumn() .intervals(I_0112_0114) .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + new ExpressionVirtualColumn( + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, + "array(\"market\",\"quality\")", + ColumnType.STRING, + TestExprMacroTable.INSTANCE + ), null )) .columns(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .eternityInterval() .legacy(legacy) - .virtualColumns( - new ExpressionVirtualColumn( - "vc", - "array(\"market\",\"quality\")", - ColumnType.STRING, - TestExprMacroTable.INSTANCE - ) - ) .limit(4) .build(); diff --git a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java index cfb50d06823e..e822913489dd 100644 --- a/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/topn/UnnestTopNQueryRunnerTest.java @@ -253,19 +253,15 @@ public void testTopNStringVirtualColumnUnnest() TopNQuery query = new TopNQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null - )) - .granularity(QueryRunnerTestHelper.ALL_GRAN) - .virtualColumns( new ExpressionVirtualColumn( - "vc", + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "mv_to_array(\"placementish\")", ColumnType.STRING_ARRAY, TestExprMacroTable.INSTANCE - ) - ) + ), + null + )) + .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .metric("rows") .threshold(4) @@ -340,19 +336,15 @@ public void testTopNStringVirtualMultiColumnUnnest() TopNQuery query = new TopNQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), - "vc", - QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, - null - )) - .granularity(QueryRunnerTestHelper.ALL_GRAN) - .virtualColumns( new ExpressionVirtualColumn( - "vc", + QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST, "array(\"market\",\"quality\")", ColumnType.STRING, TestExprMacroTable.INSTANCE - ) - ) + ), + null + )) + .granularity(QueryRunnerTestHelper.ALL_GRAN) .dimension(QueryRunnerTestHelper.PLACEMENTISH_DIMENSION_UNNEST) .metric("rows") .threshold(2) diff --git a/processing/src/test/java/org/apache/druid/segment/ListCursor.java b/processing/src/test/java/org/apache/druid/segment/ListCursor.java index 666bc21be5bb..2ff5d78dab22 100644 --- a/processing/src/test/java/org/apache/druid/segment/ListCursor.java +++ b/processing/src/test/java/org/apache/druid/segment/ListCursor.java @@ -41,7 +41,7 @@ */ public class ListCursor implements Cursor { - List baseList; + private final List baseList; private int index; public ListCursor(List inputList) @@ -62,19 +62,19 @@ public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) @Override public IndexedInts getRow() { - return null; + throw new UnsupportedOperationException(); } @Override public ValueMatcher makeValueMatcher(@Nullable String value) { - return null; + throw new UnsupportedOperationException(); } @Override public ValueMatcher makeValueMatcher(Predicate predicate) { - return null; + throw new UnsupportedOperationException(); } @Override @@ -96,20 +96,20 @@ public Object getObject() @Override public Class classOfObject() { - return null; + return Object.class; } @Override public int getValueCardinality() { - return 0; + return DimensionDictionarySelector.CARDINALITY_UNKNOWN; } @Nullable @Override public String lookupName(int id) { - return null; + throw new UnsupportedOperationException(); } @Override @@ -175,7 +175,7 @@ public Object getObject() @Override public Class classOfObject() { - return null; + return Object.class; } }; } @@ -192,7 +192,7 @@ public ColumnCapabilities getColumnCapabilities(String column) @Override public DateTime getTime() { - return null; + throw new UnsupportedOperationException(); } @Override diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java index cf4a98c88035..11b6b7e1d6a0 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestColumnValueSelectorCursorTest.java @@ -19,9 +19,17 @@ package org.apache.druid.segment; +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.math.expr.ExprMacroTable; +import org.apache.druid.math.expr.ExpressionProcessing; import org.apache.druid.query.dimension.DefaultDimensionSpec; +import org.apache.druid.query.monomorphicprocessing.StringRuntimeShape; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import java.util.ArrayList; @@ -36,6 +44,18 @@ public class UnnestColumnValueSelectorCursorTest extends InitializedNullHandling private static LinkedHashSet IGNORE_SET = null; private static LinkedHashSet IGNORE_SET1 = new LinkedHashSet<>(Arrays.asList("b", "f")); + @BeforeClass + public static void setUpClass() + { + NullHandling.initializeForTests(); + ExpressionProcessing.initializeForTests(true); // Allow nested arrays + } + + @AfterClass + public static void tearDownClass() + { + ExpressionProcessing.initializeForTests(null); // Clear special expression-processing config. + } @Test public void test_list_unnest_cursors() @@ -52,7 +72,7 @@ public void test_list_unnest_cursors() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -86,7 +106,7 @@ public void test_list_unnest_cursors_user_supplied_list() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -118,7 +138,7 @@ public void test_list_unnest_cursors_user_supplied_list_only_nulls() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -155,7 +175,7 @@ public void test_list_unnest_cursors_user_supplied_list_mixed_with_nulls() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -189,7 +209,7 @@ public void test_list_unnest_cursors_user_supplied_strings_and_no_lists() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -219,7 +239,7 @@ public void test_list_unnest_cursors_user_supplied_strings_mixed_with_list() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -246,14 +266,15 @@ public void test_list_unnest_cursors_user_supplied_lists_three_levels() List expectedResults = Arrays.asList("a", "b", "c", "e", "f", "g", "h", "i", "j", Arrays.asList("a", "b")); - //Create base cursor + // Create base cursor. Need to set type to STRING; otherwise auto-detected type is STRING_ARRAY and the "j" will + // be wrapped in an array (which we don't want). ListCursor listCursor = new ListCursor(inputList); //Create unnest cursor UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -287,14 +308,14 @@ public void test_list_unnest_of_unnest_cursors_user_supplied_list_three_levels() UnnestColumnValueSelectorCursor childCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); UnnestColumnValueSelectorCursor parentCursor = new UnnestColumnValueSelectorCursor( childCursor, childCursor.getColumnSelectorFactory(), - OUTPUT_NAME, + new ExpressionVirtualColumn("__unnest__", "\"" + OUTPUT_NAME + "\"", null, ExprMacroTable.nil()), "tmp-out", IGNORE_SET ); @@ -329,7 +350,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_nulls() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -367,7 +388,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_dups() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -405,7 +426,7 @@ public void test_list_unnest_cursors_user_supplied_list_with_ignore_set() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET1 ); @@ -443,7 +464,7 @@ public void test_list_unnest_cursors_user_supplied_list_double() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -477,7 +498,7 @@ public void test_list_unnest_cursors_user_supplied_list_float() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -511,7 +532,7 @@ public void test_list_unnest_cursors_user_supplied_list_long() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -548,7 +569,7 @@ public void test_list_unnest_cursors_user_supplied_list_three_level_arrays_and_m UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", null, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -584,7 +605,7 @@ public void test_list_unnest_cursors_dimSelector() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); @@ -593,7 +614,7 @@ public void test_list_unnest_cursors_dimSelector() .makeDimensionSelector( DefaultDimensionSpec.of( OUTPUT_NAME)); - unnestDimSelector.inspectRuntimeShape(null); + StringRuntimeShape.of(unnestDimSelector); // Ensure no errors, infinite-loops, etc. int k = 0; while (!unnestCursor.isDone()) { if (k < 8) { @@ -627,7 +648,7 @@ public void test_list_unnest_cursors_user_supplied_list_of_integers() UnnestColumnValueSelectorCursor unnestCursor = new UnnestColumnValueSelectorCursor( listCursor, listCursor.getColumnSelectorFactory(), - "dummy", + new ExpressionVirtualColumn("__unnest__", "\"dummy\"", ColumnType.STRING, ExprMacroTable.nil()), OUTPUT_NAME, IGNORE_SET ); diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index 35d42b82d4b3..cac51de17f25 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.io.Closer; +import org.apache.druid.math.expr.ExprMacroTable; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ValueType; @@ -32,10 +33,13 @@ import org.apache.druid.segment.generator.SegmentGenerator; import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter; +import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.LinearShardSpec; import org.apache.druid.utils.CloseableUtils; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -82,26 +86,22 @@ public static void setup() INCREMENTAL_INDEX_STORAGE_ADAPTER = new IncrementalIndexStorageAdapter(INCREMENTAL_INDEX); UNNEST_STORAGE_ADAPTER = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, - COLUMNNAME, - OUTPUT_COLUMN_NAME, + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), null ); UNNEST_STORAGE_ADAPTER1 = new UnnestStorageAdapter( INCREMENTAL_INDEX_STORAGE_ADAPTER, - COLUMNNAME, - OUTPUT_COLUMN_NAME, + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), IGNORE_SET ); UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, - COLUMNNAME, - OUTPUT_COLUMN_NAME1, + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME +"\"", null, ExprMacroTable.nil()), null ); UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER1, - COLUMNNAME, - OUTPUT_COLUMN_NAME1, + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), IGNORE_SET ); ADAPTERS = ImmutableList.of( @@ -143,7 +143,8 @@ public void test_group_of_unnest_adapters_methods() adapter.getColumnCapabilities(colName).toColumnType(), INCREMENTAL_INDEX_STORAGE_ADAPTER.getColumnCapabilities(colName).toColumnType() ); - Assert.assertEquals(((UnnestStorageAdapter) adapter).getDimensionToUnnest(), colName); + + assertColumnReadsIdentifier(((UnnestStorageAdapter) adapter).getUnnestColumn(), colName); } } @@ -173,7 +174,7 @@ public void test_group_of_unnest_adapters_column_capabilities() ColumnCapabilities capabilities = adapter.getColumnCapabilities(columnsInTable.get(i)); Assert.assertEquals(capabilities.getType(), valueTypes.get(i)); } - Assert.assertEquals(adapter.getDimensionToUnnest(), colName); + assertColumnReadsIdentifier(adapter.getUnnestColumn(), colName); } @@ -312,13 +313,13 @@ public void test_two_levels_of_unnest_adapters_with_allowList() null ); UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER3; - Assert.assertEquals(adapter.getDimensionToUnnest(), columnName); + assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); Assert.assertEquals( adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), ColumnCapabilities.Capable.TRUE ); - Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME)); + Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); + Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); cursorSequence.accumulate(null, (accumulated, cursor) -> { ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); @@ -364,13 +365,13 @@ public void test_unnest_adapters_methods_with_allowList() null ); UnnestStorageAdapter adapter = UNNEST_STORAGE_ADAPTER1; - Assert.assertEquals(adapter.getDimensionToUnnest(), columnName); + assertColumnReadsIdentifier(adapter.getUnnestColumn(), columnName); Assert.assertEquals( adapter.getColumnCapabilities(OUTPUT_COLUMN_NAME).isDictionaryEncoded(), ColumnCapabilities.Capable.TRUE ); - Assert.assertEquals(adapter.getMaxValue(columnName), adapter.getMaxValue(OUTPUT_COLUMN_NAME)); - Assert.assertEquals(adapter.getMinValue(columnName), adapter.getMinValue(OUTPUT_COLUMN_NAME)); + Assert.assertNull(adapter.getMaxValue(OUTPUT_COLUMN_NAME)); + Assert.assertNull(adapter.getMinValue(OUTPUT_COLUMN_NAME)); cursorSequence.accumulate(null, (accumulated, cursor) -> { ColumnSelectorFactory factory = cursor.getColumnSelectorFactory(); @@ -396,4 +397,10 @@ public void test_unnest_adapters_methods_with_allowList() return null; }); } + + private static void assertColumnReadsIdentifier(final VirtualColumn column, final String identifier) + { + MatcherAssert.assertThat(column, CoreMatchers.instanceOf(ExpressionVirtualColumn.class)); + Assert.assertEquals("\"" + identifier + "\"", ((ExpressionVirtualColumn) column).getExpression()); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java index c3bf402df9e6..a3598e17a57a 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java @@ -187,7 +187,7 @@ private static class ArrayAggFunction extends SqlAggFunction OperandTypes.or( OperandTypes.ANY, OperandTypes.and( - OperandTypes.sequence(StringUtils.format("'%s'(expr, maxSizeBytes)", NAME), OperandTypes.ANY, OperandTypes.POSITIVE_INTEGER_LITERAL), + OperandTypes.sequence(StringUtils.format("%s(expr, maxSizeBytes)", NAME), OperandTypes.ANY, OperandTypes.POSITIVE_INTEGER_LITERAL), OperandTypes.family(SqlTypeFamily.ANY, SqlTypeFamily.NUMERIC) ) ), diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index 0ce7a7cf0f14..cee9aa69cdf2 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -24,7 +24,6 @@ import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; @@ -35,7 +34,6 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprMacroTable; @@ -60,7 +58,6 @@ import org.apache.druid.sql.calcite.filtration.Filtration; import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerContext; -import org.apache.druid.sql.calcite.rel.CannotBuildQueryException; import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; import org.apache.druid.sql.calcite.table.RowSignatures; import org.joda.time.Interval; @@ -104,7 +101,7 @@ public static RexNode fromFieldAccess( // once and then reuse it. return RexInputRef.of(fieldNumber, RowSignatures.toRelDataType(rowSignature, new JavaTypeFactoryImpl())); } else { - return project.getChildExps().get(fieldNumber); + return project.getProjects().get(fieldNumber); } } @@ -217,51 +214,12 @@ public static DruidExpression toDruidExpressionWithPostAggOperands( return rexCallToDruidExpression(plannerContext, rowSignature, rexNode, postAggregatorVisitor); } else if (kind == SqlKind.LITERAL) { return literalToDruidExpression(plannerContext, rexNode); - } else if (kind == SqlKind.FIELD_ACCESS) { - return fieldAccessToDruidExpression(rowSignature, rexNode); } else { // Can't translate. return null; } } - private static DruidExpression fieldAccessToDruidExpression( - final RowSignature rowSignature, - final RexNode rexNode - ) - { - // Translate field references. - final RexFieldAccess ref = (RexFieldAccess) rexNode; - if (ref.getField().getIndex() > rowSignature.size()) { - // This case arises in the case of a correlation where the rexNode points to a table from the left subtree - // while the underlying datasource is the scan stub created from LogicalValuesRule - // In such a case we throw a CannotBuildQueryException so that Calcite does not go ahead with this path - // This exception is caught while returning false from isValidDruidQuery() method - throw new CannotBuildQueryException(StringUtils.format( - "Cannot build query as column name [%s] does not exist in row [%s]", ref.getField().getName(), rowSignature) - ); - } - - final String columnName = ref.getField().getName(); - final int index = rowSignature.indexOf(columnName); - - // This case arises when the rexNode has a name which is not in the underlying stub created using DruidUnnestDataSourceRule - // The column name has name ZERO with rowtype as LONG - // causes the index to be -1. In such a case we cannot build the query - // and throw an exception while returning false from isValidDruidQuery() method - if (index < 0) { - throw new CannotBuildQueryException(StringUtils.format( - "Expression referred to nonexistent index[%d] in row[%s]", - index, - rowSignature - )); - } - - final Optional columnType = rowSignature.getColumnType(index); - - return DruidExpression.ofColumn(columnType.get(), columnName); - } - private static DruidExpression inputRefToDruidExpression( final RowSignature rowSignature, final RexNode rexNode diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java index 9062a32d0baf..9ab91bdc7b88 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/CastOperatorConversion.java @@ -76,6 +76,8 @@ public class CastOperatorConversion implements SqlOperatorConversion builder.put(type, ExprType.LONG); } + builder.put(SqlTypeName.ARRAY, ExprType.ARRAY); + EXPRESSION_TYPES = builder.build(); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java index f8245ddeeda4..b1135b884215 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/MultiValueStringToArrayOperatorConversion.java @@ -34,7 +34,7 @@ public class MultiValueStringToArrayOperatorConversion extends DirectOperatorConversion { - private static final SqlFunction SQL_FUNCTION = OperatorConversions + public static final SqlFunction SQL_FUNCTION = OperatorConversions .operatorBuilder("MV_TO_ARRAY") .operandTypeChecker(OperandTypes.family(SqlTypeFamily.STRING)) .functionCategory(SqlFunctionCategory.STRING) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/external/ExternalTableScanRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/external/ExternalTableScanRule.java index fc3d8098ed7b..1fe085eb3e35 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/external/ExternalTableScanRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/external/ExternalTableScanRule.java @@ -43,7 +43,7 @@ public ExternalTableScanRule(final PlannerContext plannerContext) @Override public boolean matches(RelOptRuleCall call) { - if (plannerContext.engineHasFeature(EngineFeature.READ_EXTERNAL_DATA)) { + if (plannerContext.featureAvailable(EngineFeature.READ_EXTERNAL_DATA)) { return super.matches(call); } else { plannerContext.setPlanningError( @@ -59,7 +59,7 @@ public boolean matches(RelOptRuleCall call) @Override public void onMatch(final RelOptRuleCall call) { - if (!plannerContext.engineHasFeature(EngineFeature.READ_EXTERNAL_DATA)) { + if (!plannerContext.featureAvailable(EngineFeature.READ_EXTERNAL_DATA)) { // Not called because "matches" returns false. throw new UnsupportedOperationException(); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java index a50b63596b1a..d538c30ab3d7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java @@ -83,6 +83,7 @@ import org.apache.druid.sql.calcite.rule.FilterJoinExcludePushToChildRule; import org.apache.druid.sql.calcite.rule.ProjectAggregatePruneUnusedCallRule; import org.apache.druid.sql.calcite.rule.SortCollapseRule; +import org.apache.druid.sql.calcite.run.EngineFeature; import java.util.List; import java.util.Set; @@ -284,7 +285,8 @@ public List baseRuleSet(final PlannerContext plannerContext) rules.addAll(ABSTRACT_RELATIONAL_RULES); if (!plannerConfig.isUseApproximateCountDistinct()) { - if (plannerConfig.isUseGroupingSetForExactDistinct()) { + if (plannerConfig.isUseGroupingSetForExactDistinct() + && plannerContext.featureAvailable(EngineFeature.GROUPING_SETS)) { rules.add(AggregateExpandDistinctAggregatesRule.INSTANCE); } else { rules.add(AggregateExpandDistinctAggregatesRule.JOIN); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java index f2d98fb70cde..429da671d714 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/IngestHandler.java @@ -271,7 +271,7 @@ protected DruidSqlIngest ingestNode() @Override public void validate() throws ValidationException { - if (!handlerContext.plannerContext().engineHasFeature(EngineFeature.CAN_INSERT)) { + if (!handlerContext.plannerContext().featureAvailable(EngineFeature.CAN_INSERT)) { throw new ValidationException(StringUtils.format( "Cannot execute INSERT with SQL engine '%s'.", handlerContext.engine().name()) @@ -319,7 +319,7 @@ protected DruidSqlIngest ingestNode() @Override public void validate() throws ValidationException { - if (!handlerContext.plannerContext().engineHasFeature(EngineFeature.CAN_REPLACE)) { + if (!handlerContext.plannerContext().featureAvailable(EngineFeature.CAN_REPLACE)) { throw new ValidationException(StringUtils.format( "Cannot execute REPLACE with SQL engine '%s'.", handlerContext.engine().name()) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java index e2346540dbd0..c8890235dcf4 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java @@ -73,6 +73,16 @@ public class PlannerContext */ public static final String CTX_SQL_OUTER_LIMIT = "sqlOuterLimit"; + /** + * Undocumented context key, used to enable window functions. + */ + public static final String CTX_ENABLE_WINDOW_FNS = "windowsAreForClosers"; + + /** + * Undocumented context key, used to enable {@link org.apache.calcite.sql.fun.SqlStdOperatorTable#UNNEST}. + */ + public static final String CTX_ENABLE_UNNEST = "enableUnnest"; + // DataContext keys public static final String DATA_CTX_AUTHENTICATION_RESULT = "authenticationResult"; @@ -411,9 +421,28 @@ public SqlEngine getEngine() return engine; } - public boolean engineHasFeature(final EngineFeature feature) + /** + * Checks if the current {@link SqlEngine} supports a particular feature. + * + * When executing a specific query, use this method instead of + * {@link SqlEngine#featureAvailable(EngineFeature, PlannerContext)}, because it also verifies feature flags such as + * {@link #CTX_ENABLE_WINDOW_FNS}. + */ + public boolean featureAvailable(final EngineFeature feature) { - return engine.feature(feature, this); + if (feature == EngineFeature.WINDOW_FUNCTIONS && + !QueryContexts.getAsBoolean(CTX_ENABLE_WINDOW_FNS, queryContext.get(CTX_ENABLE_WINDOW_FNS), false)) { + // Short-circuit: feature requires context flag. + return false; + } + + if (feature == EngineFeature.UNNEST && + !QueryContexts.getAsBoolean(CTX_ENABLE_UNNEST, queryContext.get(CTX_ENABLE_UNNEST), false)) { + // Short-circuit: feature requires context flag. + return false; + } + + return engine.featureAvailable(feature, this); } public QueryMaker getQueryMaker() diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryHandler.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryHandler.java index 13daa80da129..146e0d8660f9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryHandler.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryHandler.java @@ -194,7 +194,7 @@ public PlannerResult plan() throws ValidationException if (!bindableTables.isEmpty()) { // Consider BINDABLE convention when necessary. Used for metadata tables. - if (!handlerContext.plannerContext().engineHasFeature(EngineFeature.ALLOW_BINDABLE_PLAN)) { + if (!handlerContext.plannerContext().featureAvailable(EngineFeature.ALLOW_BINDABLE_PLAN)) { throw new ValidationException( StringUtils.format( "Cannot query table%s %s with SQL engine '%s'.", @@ -620,7 +620,7 @@ public SqlNode sqlNode() @Override public void validate() throws ValidationException { - if (!handlerContext.plannerContext().engineHasFeature(EngineFeature.CAN_SELECT)) { + if (!handlerContext.plannerContext().featureAvailable(EngineFeature.CAN_SELECT)) { throw new ValidationException(StringUtils.format( "Cannot execute SELECT with SQL engine '%s'.", handlerContext.engine().name()) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 1751201acc1f..662560064ce4 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -30,12 +30,17 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Correlate; -import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; -import org.apache.druid.java.util.common.StringUtils; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.sql.SqlKind; import org.apache.druid.query.DataSource; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.TableDataSource; @@ -43,6 +48,8 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.expression.builtin.MultiValueStringToArrayOperatorConversion; +import org.apache.druid.sql.calcite.planner.Calcites; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -57,28 +64,27 @@ * This is the DruidRel to handle correlated rel nodes to be used for unnest. * Each correlate can be perceived as a join with the join type being inner * the left of a correlate as seen in the rule {@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} - * is the {@link DruidQueryRel} while the right will always be an {@link DruidUnnestDatasourceRel}. + * is the {@link DruidQueryRel} while the right will always be an {@link DruidUnnestRel}. * * Since this is a subclass of DruidRel it is automatically considered by other rules that involves DruidRels. * Some example being SELECT_PROJECT and SORT_PROJECT rules in {@link org.apache.druid.sql.calcite.rule.DruidRules.DruidQueryRule} */ public class DruidCorrelateUnnestRel extends DruidRel { - private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("unnest"); + private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__correlate_unnest__"); + private static final String BASE_UNNEST_OUTPUT_COLUMN = "unnest"; - private final Filter leftFilter; private final PartialDruidQuery partialQuery; private final PlannerConfig plannerConfig; private final Correlate correlateRel; - private RelNode left; - private RelNode right; + private final RelNode left; + private final RelNode right; private DruidCorrelateUnnestRel( RelOptCluster cluster, RelTraitSet traitSet, Correlate correlateRel, PartialDruidQuery partialQuery, - Filter baseFilter, PlannerContext plannerContext ) { @@ -87,16 +93,14 @@ private DruidCorrelateUnnestRel( this.partialQuery = partialQuery; this.left = correlateRel.getLeft(); this.right = correlateRel.getRight(); - this.leftFilter = baseFilter; this.plannerConfig = plannerContext.getPlannerConfig(); } /** - * Create an instance from a Correlate that is based on a {@link DruidRel} and a {@link DruidUnnestDatasourceRel} inputs. + * Create an instance from a Correlate that is based on a {@link DruidRel} and a {@link DruidUnnestRel} inputs. */ public static DruidCorrelateUnnestRel create( final Correlate correlateRel, - final Filter leftFilter, final PlannerContext plannerContext ) { @@ -105,7 +109,6 @@ public static DruidCorrelateUnnestRel create( correlateRel.getTraitSet(), correlateRel, PartialDruidQuery.create(correlateRel), - leftFilter, plannerContext ); } @@ -125,7 +128,6 @@ public DruidCorrelateUnnestRel withPartialQuery(PartialDruidQuery newQueryBuilde getTraitSet().plusAll(newQueryBuilder.getRelTraits()), correlateRel, newQueryBuilder, - leftFilter, getPlannerContext() ); } @@ -133,92 +135,62 @@ public DruidCorrelateUnnestRel withPartialQuery(PartialDruidQuery newQueryBuilde @Override public DruidQuery toDruidQuery(boolean finalizeAggregations) { - final DruidRel druidQueryRel = (DruidRel) left; - final DruidQuery leftQuery = Preconditions.checkNotNull((druidQueryRel).toDruidQuery(false), "leftQuery"); + final DruidRel leftDruidRel = (DruidRel) left; + final DruidQuery leftQuery = Preconditions.checkNotNull(leftDruidRel.toDruidQuery(false), "leftQuery"); + final DruidUnnestRel unnestDatasourceRel = (DruidUnnestRel) right; final DataSource leftDataSource; + final RowSignature leftDataSourceSignature; - if (DruidJoinQueryRel.computeLeftRequiresSubquery(druidQueryRel)) { + if (right.getRowType().getFieldNames().size() != 1) { + throw new CannotBuildQueryException("Cannot perform correlated join + UNNEST with more than one column"); + } + + if (computeLeftRequiresSubquery(leftDruidRel)) { + // Left side is doing more than projection: generate a subquery. leftDataSource = new QueryDataSource(leftQuery.getQuery()); + leftDataSourceSignature = leftQuery.getOutputRowSignature(); } else { leftDataSource = leftQuery.getDataSource(); + leftDataSourceSignature = DruidRels.dataSourceSignature(leftDruidRel); } - final DruidUnnestDatasourceRel unnestDatasourceRel = (DruidUnnestDatasourceRel) right; - - - final RowSignature rowSignature = RowSignatures.fromRelDataType( - correlateRel.getRowType().getFieldNames(), - correlateRel.getRowType() - ); - - final DruidExpression expression = Expressions.toDruidExpression( + // Compute the expression to unnest. + final RexNode rexNodeToUnnest = getRexNodeToUnnest(correlateRel, unnestDatasourceRel); + final DruidExpression expressionToUnnest = Expressions.toDruidExpression( getPlannerContext(), - rowSignature, - unnestDatasourceRel.getUnnestProject().getProjects().get(0) - ); - - LogicalProject unnestProject = LogicalProject.create( - this, - ImmutableList.of(unnestDatasourceRel.getUnnestProject() - .getProjects() - .get(0)), - unnestDatasourceRel.getUnnestProject().getRowType() - ); - - // placeholder for dimension or expression to be unnested - final String dimOrExpToUnnest; - final VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create( - rowSignature, - getPlannerContext().getExprMacroTable(), - getPlannerContext().getPlannerConfig().isForceExpressionVirtualColumns() + leftDataSourceSignature, + rexNodeToUnnest ); - // the unnest project is needed in case of a virtual column - // unnest(mv_to_array(dim_1)) is reconciled as unnesting a MVD dim_1 not requiring a virtual column - // while unnest(array(dim_2,dim_3)) is understood as unnesting a virtual column which is an array over dim_2 and dim_3 elements - boolean unnestProjectNeeded = false; - getPlannerContext().setJoinExpressionVirtualColumnRegistry(virtualColumnRegistry); - - // handling for case when mv_to_array is used - // No need to use virtual column in such a case - if (StringUtils.toLowerCase(expression.getExpression()).startsWith("mv_to_array")) { - dimOrExpToUnnest = expression.getArguments().get(0).getSimpleExtraction().getColumn(); - } else { - if (expression.isDirectColumnAccess()) { - dimOrExpToUnnest = expression.getDirectColumn(); - } else { - // buckle up time to create virtual columns on expressions - unnestProjectNeeded = true; - dimOrExpToUnnest = virtualColumnRegistry.getOrCreateVirtualColumnForExpression( - expression, - expression.getDruidType() - ); - } + if (expressionToUnnest == null) { + throw new CannotBuildQueryException(unnestDatasourceRel.getUnnestProject(), rexNodeToUnnest); } - // add the unnest project to the partial query if required - // This is necessary to handle the virtual columns on the unnestProject - // Also create the unnest datasource to be used by the partial query - PartialDruidQuery partialDruidQuery = unnestProjectNeeded ? partialQuery.withUnnest(unnestProject) : partialQuery; - return partialDruidQuery.build( + // Final output row signature. + final RowSignature correlateRowSignature = getCorrelateRowSignature(correlateRel, leftQuery); + + return partialQuery.build( UnnestDataSource.create( leftDataSource, - dimOrExpToUnnest, - unnestDatasourceRel.getUnnestProject().getRowType().getFieldNames().get(0), + expressionToUnnest.toVirtualColumn( + correlateRowSignature.getColumnName(correlateRowSignature.size() - 1), + Calcites.getColumnTypeForRelDataType(rexNodeToUnnest.getType()), + getPlannerContext().getExprMacroTable() + ), null ), - rowSignature, + correlateRowSignature, getPlannerContext(), getCluster().getRexBuilder(), finalizeAggregations, - virtualColumnRegistry + null ); } @Override protected DruidCorrelateUnnestRel clone() { - return DruidCorrelateUnnestRel.create(correlateRel, leftFilter, getPlannerContext()); + return DruidCorrelateUnnestRel.create(correlateRel, getPlannerContext()); } @Override @@ -242,10 +214,6 @@ public DruidQuery toDruidQueryForExplaining() ); } - // This is required to be overwritten as Calcite uses this method - // to maintain a map of equivalent DruidCorrelateUnnestRel or in general any Rel nodes. - // Without this method overwritten multiple RelNodes will produce the same key - // which makes the planner plan incorrectly. @Override public RelWriter explainTerms(RelWriter pw) { @@ -259,8 +227,9 @@ public RelWriter explainTerms(RelWriter pw) throw new RuntimeException(e); } - return pw.item("query", queryString) - .item("signature", druidQuery.getOutputRowSignature()); + return correlateRel.explainTerms(pw) + .item("query", queryString) + .item("signature", druidQuery.getOutputRowSignature()); } // This is called from the DruidRelToDruidRule which converts from the NONE convention to the DRUID convention @@ -278,7 +247,6 @@ public DruidCorrelateUnnestRel asDruidConvention() .collect(Collectors.toList()) ), partialQuery, - leftFilter, getPlannerContext() ); } @@ -297,7 +265,6 @@ public RelNode copy(final RelTraitSet traitSet, final List inputs) traitSet, correlateRel.copy(correlateRel.getTraitSet(), inputs), getPartialDruidQuery(), - leftFilter, getPlannerContext() ); } @@ -307,7 +274,7 @@ public RelOptCost computeSelfCost(final RelOptPlanner planner, final RelMetadata { double cost; - if (DruidJoinQueryRel.computeLeftRequiresSubquery(DruidJoinQueryRel.getSomeDruidChild(left))) { + if (computeLeftRequiresSubquery(DruidJoinQueryRel.getSomeDruidChild(left))) { cost = CostEstimates.COST_SUBQUERY; } else { cost = partialQuery.estimateCost(); @@ -327,4 +294,110 @@ public Set getDataSourceNames() retVal.addAll(((DruidRel) right).getDataSourceNames()); return retVal; } + + /** + * Computes whether a particular left-side rel requires a subquery, or if we can operate on its underlying + * datasource directly. + * + * Stricter than {@link DruidJoinQueryRel#computeLeftRequiresSubquery}: this method only allows scans (not mappings). + * This is OK because any mapping or other simple projection would have been pulled above the {@link Correlate} by + * {@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule}. + */ + public static boolean computeLeftRequiresSubquery(final DruidRel left) + { + return left == null || left.getPartialDruidQuery().stage() != PartialDruidQuery.Stage.SCAN; + } + + /** + * Whether an expr is MV_TO_ARRAY of an input reference. + */ + private static boolean isMvToArrayOfInputRef(final RexNode expr) + { + return expr.isA(SqlKind.OTHER_FUNCTION) + && ((RexCall) expr).op.equals(MultiValueStringToArrayOperatorConversion.SQL_FUNCTION) + && ((RexCall) expr).getOperands().get(0).isA(SqlKind.INPUT_REF); + } + + /** + * Unwrap MV_TO_ARRAY at the outer layer of an expr, if it refers to an input ref. + */ + private static RexNode unwrapMvToArray(final RexNode expr) + { + if (isMvToArrayOfInputRef(expr)) { + return ((RexCall) expr).getOperands().get(0); + } else { + return expr; + } + } + + /** + * Compute the row signature of this rel, given a particular left-hand {@link DruidQuery}. + * The right-hand side is assumed to have a single column with the name {@link #BASE_UNNEST_OUTPUT_COLUMN}. + */ + private static RowSignature getCorrelateRowSignature( + final Correlate correlate, + final DruidQuery leftQuery + ) + { + // Compute signature of the correlation operation. It's like a join: the left and right sides are concatenated. + // On the native query side, this is what is ultimately emitted by the UnnestStorageAdapter. + // + // Ignore prefix (lhs) from computeJoinRowSignature; we don't need this since we will declare the name of the + // single output column directly. (And we know it's the last column in the signature.) + final RelDataType unnestedType = + correlate.getRowType().getFieldList().get(correlate.getRowType().getFieldCount() - 1).getType(); + + return DruidJoinQueryRel.computeJoinRowSignature( + leftQuery.getOutputRowSignature(), + RowSignature.builder().add( + BASE_UNNEST_OUTPUT_COLUMN, + Calcites.getColumnTypeForRelDataType(unnestedType) + ).build() + ).rhs; + } + + /** + * Return the expression to unnest from the left-hand side. Correlation variable references are rewritten to + * regular field accesses, i.e., {@link RexInputRef}. + */ + private static RexNode getRexNodeToUnnest( + final Correlate correlate, + final DruidUnnestRel unnestDatasourceRel + ) + { + // Update unnestDatasourceRel.getUnnestProject() so it refers to the left-hand side rather than the correlation + // variable. This is the expression to unnest. + final RexNode rexNodeToUnnest = + new CorrelatedFieldAccessToInputRef(correlate.getCorrelationId()) + .apply(unnestDatasourceRel.getUnnestProject().getProjects().get(0)); + + // Unwrap MV_TO_ARRAY if present. + return unwrapMvToArray(rexNodeToUnnest); + } + + /** + * Shuttle that replaces correlating variables with regular field accesses to the left-hand side. + */ + private static class CorrelatedFieldAccessToInputRef extends RexShuttle + { + private final CorrelationId correlationId; + + public CorrelatedFieldAccessToInputRef(final CorrelationId correlationId) + { + this.correlationId = correlationId; + } + + @Override + public RexNode visitFieldAccess(final RexFieldAccess fieldAccess) + { + if (fieldAccess.getReferenceExpr() instanceof RexCorrelVariable) { + final RexCorrelVariable encounteredCorrelationId = (RexCorrelVariable) fieldAccess.getReferenceExpr(); + if (encounteredCorrelationId.id.equals(correlationId)) { + return new RexInputRef(fieldAccess.getField().getIndex(), fieldAccess.getType()); + } + } + + return super.visitFieldAccess(fieldAccess); + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java index d9bd16343ef1..7233b54f5e1b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidOuterQueryRel.java @@ -186,10 +186,9 @@ public RelWriter explainTerms(RelWriter pw) throw new RuntimeException(e); } - return super.explainTerms(pw) - .input("innerQuery", sourceRel) - .item("query", queryString) - .item("signature", druidQuery.getOutputRowSignature()); + return pw.input("innerQuery", sourceRel) + .item("query", queryString) + .item("signature", druidQuery.getOutputRowSignature()); } @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index dee970655871..9f781c808070 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -49,7 +49,6 @@ import org.apache.druid.query.DataSource; import org.apache.druid.query.JoinDataSource; import org.apache.druid.query.Query; -import org.apache.druid.query.QueryContext; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.LongMaxAggregatorFactory; @@ -128,7 +127,6 @@ public class DruidQuery * Used by {@link #canUseQueryGranularity}. */ private static final int MAX_TIME_GRAINS_NON_DRUID_TABLE = 100000; - public static final String CTX_ENABLE_WINDOW_FNS = "windowsAreForClosers"; private final DataSource dataSource; private final PlannerContext plannerContext; @@ -139,9 +137,6 @@ public class DruidQuery @Nullable private final Projection selectProjection; - @Nullable - private final Projection unnestProjection; - @Nullable private final Grouping grouping; @@ -162,7 +157,6 @@ private DruidQuery( final PlannerContext plannerContext, @Nullable final DimFilter filter, @Nullable final Projection selectProjection, - @Nullable final Projection unnestProjection, @Nullable final Grouping grouping, @Nullable final Sorting sorting, @Nullable final Windowing windowing, @@ -175,7 +169,6 @@ private DruidQuery( this.plannerContext = Preconditions.checkNotNull(plannerContext, "plannerContext"); this.filter = filter; this.selectProjection = selectProjection; - this.unnestProjection = unnestProjection; this.grouping = grouping; this.sorting = sorting; this.windowing = windowing; @@ -278,8 +271,7 @@ public static DruidQuery fromPartialQuery( } if (partialQuery.getWindow() != null) { - final QueryContext queryContext = plannerContext.queryContext(); - if (queryContext.getBoolean(CTX_ENABLE_WINDOW_FNS, false)) { + if (plannerContext.featureAvailable(EngineFeature.WINDOW_FUNCTIONS)) { windowing = Preconditions.checkNotNull( Windowing.fromCalciteStuff( partialQuery, @@ -289,32 +281,18 @@ public static DruidQuery fromPartialQuery( ) ); } else { - plannerContext.setPlanningError("Windowing Not Currently Supported"); - throw new CannotBuildQueryException("Windowing Not Currently Supported"); + plannerContext.setPlanningError("Windowing not supported"); + throw new CannotBuildQueryException("Windowing not supported"); } } else { windowing = null; } - if (partialQuery.getUnnestProject() != null) { - unnestProjection = Preconditions.checkNotNull( - computeUnnestProjection( - partialQuery, - plannerContext, - computeOutputRowSignature(sourceRowSignature, null, null, null, null), - virtualColumnRegistry - ) - ); - } else { - unnestProjection = null; - } - return new DruidQuery( dataSource, plannerContext, filter, selectProjection, - unnestProjection, grouping, sorting, windowing, @@ -391,18 +369,6 @@ private static Projection computeSelectProjection( } } - @Nonnull - private static Projection computeUnnestProjection( - final PartialDruidQuery partialQuery, - final PlannerContext plannerContext, - final RowSignature rowSignature, - final VirtualColumnRegistry virtualColumnRegistry - ) - { - final Project project = Preconditions.checkNotNull(partialQuery.getUnnestProject(), "unnestProject"); - return Projection.preAggregation(project, plannerContext, rowSignature, virtualColumnRegistry); - } - @Nonnull private static Grouping computeGrouping( final PartialDruidQuery partialQuery, @@ -794,16 +760,6 @@ private VirtualColumns getVirtualColumns(final boolean includeDimensions) } } - - if (unnestProjection != null) { - for (String columnName : unnestProjection.getVirtualColumns()) { - if (virtualColumnRegistry.isVirtualColumnDefined(columnName)) { - virtualColumns.add(virtualColumnRegistry.getVirtualColumn(columnName)); - } - } - } - - for (String columnName : specialized) { if (virtualColumnRegistry.isVirtualColumnDefined(columnName)) { virtualColumns.add(virtualColumnRegistry.getVirtualColumn(columnName)); @@ -1001,7 +957,7 @@ private Query computeQuery() @Nullable private TimeBoundaryQuery toTimeBoundaryQuery() { - if (!plannerContext.engineHasFeature(EngineFeature.TIME_BOUNDARY_QUERY) + if (!plannerContext.featureAvailable(EngineFeature.TIME_BOUNDARY_QUERY) || grouping == null || grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) || grouping.getHavingFilter() != null @@ -1066,7 +1022,7 @@ private TimeBoundaryQuery toTimeBoundaryQuery() @Nullable private TimeseriesQuery toTimeseriesQuery() { - if (!plannerContext.engineHasFeature(EngineFeature.TIMESERIES_QUERY) + if (!plannerContext.featureAvailable(EngineFeature.TIMESERIES_QUERY) || grouping == null || grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) || grouping.getHavingFilter() != null) { @@ -1184,7 +1140,7 @@ private TimeseriesQuery toTimeseriesQuery() private TopNQuery toTopNQuery() { // Must be allowed by the QueryMaker. - if (!plannerContext.engineHasFeature(EngineFeature.TOPN_QUERY)) { + if (!plannerContext.featureAvailable(EngineFeature.TOPN_QUERY)) { return null; } @@ -1474,7 +1430,7 @@ private ScanQuery toScanQuery() orderByColumns = Collections.emptyList(); } - if (!plannerContext.engineHasFeature(EngineFeature.SCAN_ORDER_BY_NON_TIME) && !orderByColumns.isEmpty()) { + if (!plannerContext.featureAvailable(EngineFeature.SCAN_ORDER_BY_NON_TIME) && !orderByColumns.isEmpty()) { if (orderByColumns.size() > 1 || !ColumnHolder.TIME_COLUMN_NAME.equals(orderByColumns.get(0).getColumnName())) { // Cannot handle this ordering. // Scan cannot ORDER BY non-time columns. @@ -1533,7 +1489,7 @@ private Map withScanSignatureIfNeeded( final Map queryContext ) { - if (!plannerContext.engineHasFeature(EngineFeature.SCAN_NEEDS_SIGNATURE)) { + if (!plannerContext.featureAvailable(EngineFeature.SCAN_NEEDS_SIGNATURE)) { return queryContext; } // Compute the signature of the columns that we are selecting. diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java index 3e5c72a71fdd..dca85b127907 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java @@ -22,22 +22,24 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.base.Preconditions; import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.ConventionTraitDef; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.logical.LogicalTableScan; -import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; +import org.apache.druid.query.InlineDataSource; import org.apache.druid.sql.calcite.external.ExternalTableScan; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.DruidTable; +import org.apache.druid.sql.calcite.table.InlineTable; import javax.annotation.Nullable; - import java.util.Set; /** @@ -106,19 +108,19 @@ public static DruidQueryRel scanExternal( /** * Create a DruidQueryRel representing a full scan of inline, literal values. */ - public static DruidQueryRel scanValues( - final LogicalValues valuesRel, - final DruidTable druidTable, + public static DruidQueryRel scanConstantRel( + final RelNode rel, + final InlineDataSource dataSource, final PlannerContext plannerContext ) { return new DruidQueryRel( - valuesRel.getCluster(), - valuesRel.getTraitSet(), // the traitSet of valuesRel should be kept + rel.getCluster(), + rel.getTraitSet().replace(Convention.NONE), // keep traitSet of input rel, except for convention null, - druidTable, + new InlineTable(dataSource), plannerContext, - PartialDruidQuery.create(valuesRel) + PartialDruidQuery.create(rel) ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java index 7bf305d42b98..b264efe51b57 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java @@ -22,6 +22,7 @@ import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelWriter; import org.apache.druid.server.QueryResponse; import org.apache.druid.sql.calcite.planner.PlannerContext; @@ -95,6 +96,19 @@ public PlannerContext getPlannerContext() return plannerContext; } + /** + * Overridden to ensure that subclasses provide a proper implementation. The default implementation from + * {@link AbstractRelNode} does nothing and is not appropriate. + */ + @Override + public RelWriter explainTerms(RelWriter pw) + { + throw new UnsupportedOperationException(); + } + + /** + * Returns a copy of this rel with the {@link DruidConvention} trait. + */ public abstract T asDruidConvention(); /** diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java index 6a1369bc24c6..3e828491a399 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java @@ -19,6 +19,10 @@ package org.apache.druid.sql.calcite.rel; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.calcite.rel.RelWriter; +import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.table.DruidTable; import java.util.Optional; @@ -63,7 +67,7 @@ public static boolean isScanOrMapping(final DruidRel druidRel, final boolean * @param canBeJoinOrUnion consider a {@link DruidJoinQueryRel} or {@link DruidUnionDataSourceRel} as possible * scans-and-mappings too. */ - private static boolean isScanOrProject(final DruidRel druidRel, final boolean canBeJoinOrUnion) + public static boolean isScanOrProject(final DruidRel druidRel, final boolean canBeJoinOrUnion) { if (druidRel instanceof DruidQueryRel || (canBeJoinOrUnion && (druidRel instanceof DruidJoinQueryRel || druidRel instanceof DruidUnionDataSourceRel))) { @@ -75,4 +79,38 @@ private static boolean isScanOrProject(final DruidRel druidRel, final boolean return false; } } + + /** + * Returns the signature of the datasource of a {@link DruidRel}. + * + * This is not the signature of the {@link DruidRel} itself: in particular, it ignores any operations that are layered + * on top of the datasource. + */ + public static RowSignature dataSourceSignature(final DruidRel druidRel) + { + if (druidRel instanceof DruidQueryRel) { + // Get signature directly from the table. + return ((DruidQueryRel) druidRel).getDruidTable().getRowSignature(); + } else { + // Build the query with a no-op PartialDruidQuery. + return druidRel.withPartialQuery( + PartialDruidQuery.create(druidRel.getPartialDruidQuery().getScan()) + ).toDruidQuery(false).getOutputRowSignature(); + } + } + + /** + * Return a JSON representation of a query suitable for implementing {@link DruidRel#explainTerms(RelWriter)}. + */ + public static String toQueryStringForExplaining(final DruidRel rel, final ObjectMapper mapper) + { + final DruidQuery druidQuery = rel.toDruidQueryForExplaining(); + + try { + return mapper.writeValueAsString(druidQuery.getQuery()); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnionRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnionRel.java index f754fc0cf022..17fcbd6bfc37 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnionRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnionRel.java @@ -221,8 +221,6 @@ public Set getDataSourceNames() @Override public RelWriter explainTerms(RelWriter pw) { - super.explainTerms(pw); - for (int i = 0; i < rels.size(); i++) { pw.input(StringUtils.format("input#%d", i), rels.get(i)); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java deleted file mode 100644 index cb01a003eae8..000000000000 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestDatasourceRel.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.sql.calcite.rel; - -import org.apache.calcite.rel.RelWriter; -import org.apache.calcite.rel.core.Uncollect; -import org.apache.calcite.rel.logical.LogicalProject; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.druid.math.expr.Expr; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; -import org.apache.druid.math.expr.InputBindings; -import org.apache.druid.query.InlineDataSource; -import org.apache.druid.query.UnnestDataSource; -import org.apache.druid.segment.column.RowSignature; -import org.apache.druid.sql.calcite.expression.DruidExpression; -import org.apache.druid.sql.calcite.expression.Expressions; -import org.apache.druid.sql.calcite.planner.PlannerContext; -import org.apache.druid.sql.calcite.table.RowSignatures; - -import javax.annotation.Nullable; -import java.util.Collections; -import java.util.Set; - -/** - * The Rel node to capture the unnest (or uncollect) part in a query. This covers 2 cases: - * - * Case 1: - * If this is an unnest on a constant and no input table is required, the final query is built using - * an UnnestDataSource with a base InlineDataSource in this rel. - * - * Case 2: - * If the unnest has an input table, this rel resolves the unnest part and delegates the rel to be consumed by other - * rule ({@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} - */ -public class DruidUnnestDatasourceRel extends DruidRel -{ - private final Uncollect uncollect; - private final DruidQueryRel druidQueryRel; - private final LogicalProject unnestProject; - - public DruidUnnestDatasourceRel( - Uncollect uncollect, - DruidQueryRel queryRel, - LogicalProject unnestProject, - PlannerContext plannerContext - ) - { - super(uncollect.getCluster(), uncollect.getTraitSet(), plannerContext); - this.uncollect = uncollect; - this.druidQueryRel = queryRel; - this.unnestProject = unnestProject; - } - - public LogicalProject getUnnestProject() - { - return unnestProject; - } - - @Nullable - @Override - public PartialDruidQuery getPartialDruidQuery() - { - return druidQueryRel.getPartialDruidQuery(); - } - - @Override - public DruidUnnestDatasourceRel withPartialQuery(PartialDruidQuery newQueryBuilder) - { - return new DruidUnnestDatasourceRel( - uncollect, - druidQueryRel.withPartialQuery(newQueryBuilder), - unnestProject, - getPlannerContext() - ); - } - - @Override - public DruidQuery toDruidQuery(boolean finalizeAggregations) - { - VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create( - druidQueryRel.getDruidTable().getRowSignature(), - getPlannerContext().getExprMacroTable(), - getPlannerContext().getPlannerConfig().isForceExpressionVirtualColumns() - ); - getPlannerContext().setJoinExpressionVirtualColumnRegistry(virtualColumnRegistry); - - final DruidExpression expression = Expressions.toDruidExpression( - getPlannerContext(), - druidQueryRel.getDruidTable().getRowSignature(), - unnestProject.getProjects().get(0) - ); - if (expression == null) { - return null; - } - Expr parsed = expression.parse(getPlannerContext().getExprMacroTable()); - ExprEval eval = parsed.eval(InputBindings.nilBindings()); - - // If query unnests a constant expression and not use any table - // the unnest would be on an inline data source - // with the input column being called "inline" in the native query - UnnestDataSource dataSource = UnnestDataSource.create( - InlineDataSource.fromIterable( - Collections.singletonList(new Object[]{eval.valueOrDefault()}), - RowSignature.builder().add("inline", ExpressionType.toColumnType(eval.type())).build() - ), - "inline", - druidQueryRel.getRowType().getFieldNames().get(0), - null - ); - - DruidQuery query = druidQueryRel.getPartialDruidQuery().build( - dataSource, - RowSignatures.fromRelDataType(uncollect.getRowType().getFieldNames(), uncollect.getRowType()), - getPlannerContext(), - getCluster().getRexBuilder(), - finalizeAggregations - ); - getPlannerContext().setJoinExpressionVirtualColumnRegistry(null); - return query; - } - - @Override - public DruidQuery toDruidQueryForExplaining() - { - return toDruidQuery(false); - } - - @Override - public DruidUnnestDatasourceRel asDruidConvention() - { - return new DruidUnnestDatasourceRel( - new Uncollect(getCluster(), traitSet.replace(DruidConvention.instance()), uncollect.getInput(), false), - druidQueryRel.asDruidConvention(), - unnestProject, - getPlannerContext() - ); - } - - @Override - public RelWriter explainTerms(RelWriter pw) - { - return super.explainTerms(pw); - } - - @Override - public Set getDataSourceNames() - { - return druidQueryRel.getDruidTable().getDataSource().getTableNames(); - } - - @Override - protected RelDataType deriveRowType() - { - return uncollect.getRowType(); - } - - @Override - protected DruidUnnestDatasourceRel clone() - { - return new DruidUnnestDatasourceRel(uncollect, druidQueryRel, unnestProject, getPlannerContext()); - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java new file mode 100644 index 000000000000..a543e01dc4b5 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rel; + +import com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Uncollect; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.query.TableDataSource; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.table.RowSignatures; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Set; + +/** + * Captures the unnest (i.e. {@link Uncollect}) part of a correlated unnesting join. + * + * This rel cannot be executed directly. It is a holder of information for {@link DruidCorrelateUnnestRel}. + * + * Unnest on literal values, without correlated join, is handled directly by + * {@link org.apache.druid.sql.calcite.rule.DruidUnnestRule}. is applied without a correlated join, This covers the case where an unnest has an + * input table, this rel resolves the unnest part and delegates the rel to be consumed by other + * rule ({@link org.apache.druid.sql.calcite.rule.DruidCorrelateUnnestRule} + */ +public class DruidUnnestRel extends DruidRel +{ + private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__unnest__"); + + /** + * An {@link Uncollect} on top of a {@link Project} that generates an expression to be unnested. The underlying + * {@link Project} is not expected to reference any bits of its input; instead it references either a constant or + * a correlation variable through a {@link org.apache.calcite.rex.RexFieldAccess}. + */ + private final Uncollect uncollect; + + private DruidUnnestRel( + final RelOptCluster cluster, + final RelTraitSet traits, + final Uncollect uncollect, + final PlannerContext plannerContext + ) + { + super(cluster, traits, plannerContext); + this.uncollect = uncollect; + + if (!(uncollect.getInputs().get(0) instanceof Project)) { + // Validate that the Uncollect reads from a Project. + throw new ISE( + "Uncollect must reference Project, but child was [%s]", + uncollect.getInputs().get(0) + ); + } + } + + public static DruidUnnestRel create(final Uncollect uncollect, final PlannerContext plannerContext) + { + return new DruidUnnestRel( + uncollect.getCluster(), + uncollect.getTraitSet(), + uncollect, + plannerContext + ); + } + + /** + * Uncollect (unnest) operation that references the {@link #getUnnestProject()}. + */ + public Uncollect getUncollect() + { + return uncollect; + } + + /** + * Project that generates the expression to be unnested. + */ + public Project getUnnestProject() + { + return (Project) uncollect.getInputs().get(0); + } + + @Override + public PartialDruidQuery getPartialDruidQuery() + { + return null; + } + + @Override + public DruidUnnestRel withPartialQuery(PartialDruidQuery newQueryBuilder) + { + throw new UnsupportedOperationException(); + } + + /** + * Returns a new rel with the {@link #getUnnestProject()} replaced. + */ + public DruidUnnestRel withUnnestProject(final Project newUnnestProject) + { + return new DruidUnnestRel( + getCluster(), + getTraitSet(), + (Uncollect) uncollect.copy( + uncollect.getTraitSet(), + newUnnestProject + ), + getPlannerContext() + ); + } + + @Override + public DruidQuery toDruidQuery(boolean finalizeAggregations) + { + // DruidUnnestRel is a holder for info for DruidCorrelateUnnestRel. It cannot be executed on its own. + throw new CannotBuildQueryException("Cannot execute UNNEST directly"); + } + + @Override + public DruidQuery toDruidQueryForExplaining() + { + return PartialDruidQuery + .create(uncollect) + .build( + DUMMY_DATA_SOURCE, + RowSignatures.fromRelDataType( + uncollect.getRowType().getFieldNames(), + uncollect.getRowType() + ), + getPlannerContext(), + getCluster().getRexBuilder(), + false + ); + } + + @Nullable + @Override + public DruidUnnestRel asDruidConvention() + { + return new DruidUnnestRel( + getCluster(), + getTraitSet().replace(DruidConvention.instance()), + uncollect, + getPlannerContext() + ); + } + + @Override + public RelWriter explainTerms(RelWriter pw) + { + final String queryString; + final DruidQuery druidQuery = toDruidQueryForExplaining(); + + try { + queryString = getPlannerContext().getJsonMapper().writeValueAsString(druidQuery.getQuery()); + } + catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + + return pw.item("unnestProject", getUnnestProject()) + .item("uncollect", getUncollect()) + .item("query", queryString) + .item("signature", druidQuery.getOutputRowSignature()); + } + + @Override + public Set getDataSourceNames() + { + return Collections.emptySet(); + } + + @Override + protected RelDataType deriveRowType() + { + return uncollect.getRowType(); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java index 0cd71af8e5e7..4767167b8508 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/PartialDruidQuery.java @@ -54,7 +54,6 @@ public class PartialDruidQuery private final RelNode scan; private final Filter whereFilter; private final Project selectProject; - private final Project unnestProject; private final Aggregate aggregate; private final Filter havingFilter; private final Project aggregateProject; @@ -83,7 +82,6 @@ public enum Stage // WINDOW may be present only together with SCAN. WINDOW, - UNNEST_PROJECT } private PartialDruidQuery( @@ -96,8 +94,7 @@ private PartialDruidQuery( final Filter havingFilter, final Sort sort, final Project sortProject, - final Window window, - final Project unnestProject + final Window window ) { this.builderSupplier = Preconditions.checkNotNull(builderSupplier, "builderSupplier"); @@ -110,16 +107,15 @@ private PartialDruidQuery( this.sort = sort; this.sortProject = sortProject; this.window = window; - this.unnestProject = unnestProject; } - public static PartialDruidQuery create(final RelNode scanRel) + public static PartialDruidQuery create(final RelNode inputRel) { final Supplier builderSupplier = () -> RelFactories.LOGICAL_BUILDER.create( - scanRel.getCluster(), - scanRel.getTable() != null ? scanRel.getTable().getRelOptSchema() : null + inputRel.getCluster(), + inputRel.getTable() != null ? inputRel.getTable().getRelOptSchema() : null ); - return new PartialDruidQuery(builderSupplier, scanRel, null, null, null, null, null, null, null, null, null); + return new PartialDruidQuery(builderSupplier, inputRel, null, null, null, null, null, null, null, null); } public RelNode getScan() @@ -137,11 +133,6 @@ public Project getSelectProject() return selectProject; } - public Project getUnnestProject() - { - return unnestProject; - } - public Aggregate getAggregate() { return aggregate; @@ -185,8 +176,7 @@ public PartialDruidQuery withWhereFilter(final Filter newWhereFilter) havingFilter, sort, sortProject, - window, - unnestProject + window ); } @@ -229,8 +219,7 @@ public PartialDruidQuery withSelectProject(final Project newSelectProject) havingFilter, sort, sortProject, - window, - unnestProject + window ); } @@ -247,8 +236,7 @@ public PartialDruidQuery withAggregate(final Aggregate newAggregate) havingFilter, sort, sortProject, - window, - unnestProject + window ); } @@ -265,8 +253,7 @@ public PartialDruidQuery withHavingFilter(final Filter newHavingFilter) newHavingFilter, sort, sortProject, - window, - unnestProject + window ); } @@ -283,8 +270,7 @@ public PartialDruidQuery withAggregateProject(final Project newAggregateProject) havingFilter, sort, sortProject, - window, - unnestProject + window ); } @@ -301,8 +287,7 @@ public PartialDruidQuery withSort(final Sort newSort) havingFilter, newSort, sortProject, - window, - unnestProject + window ); } @@ -319,8 +304,7 @@ public PartialDruidQuery withSortProject(final Project newSortProject) havingFilter, sort, newSortProject, - window, - unnestProject + window ); } @@ -337,25 +321,7 @@ public PartialDruidQuery withWindow(final Window newWindow) havingFilter, sort, sortProject, - newWindow, - unnestProject - ); - } - - public PartialDruidQuery withUnnest(final Project newUnnestProject) - { - return new PartialDruidQuery( - builderSupplier, - scan, - whereFilter, - selectProject, - aggregate, - aggregateProject, - havingFilter, - sort, - sortProject, - window, - newUnnestProject + newWindow ); } @@ -572,15 +538,17 @@ public boolean equals(final Object o) if (o == null || getClass() != o.getClass()) { return false; } - final PartialDruidQuery that = (PartialDruidQuery) o; - return Objects.equals(scan, that.scan) && - Objects.equals(whereFilter, that.whereFilter) && - Objects.equals(selectProject, that.selectProject) && - Objects.equals(aggregate, that.aggregate) && - Objects.equals(havingFilter, that.havingFilter) && - Objects.equals(aggregateProject, that.aggregateProject) && - Objects.equals(sort, that.sort) && - Objects.equals(sortProject, that.sortProject); + PartialDruidQuery that = (PartialDruidQuery) o; + return Objects.equals(builderSupplier, that.builderSupplier) + && Objects.equals(scan, that.scan) + && Objects.equals(whereFilter, that.whereFilter) + && Objects.equals(selectProject, that.selectProject) + && Objects.equals(aggregate, that.aggregate) + && Objects.equals(havingFilter, that.havingFilter) + && Objects.equals(aggregateProject, that.aggregateProject) + && Objects.equals(sort, that.sort) + && Objects.equals(sortProject, that.sortProject) + && Objects.equals(window, that.window); } @Override @@ -594,7 +562,8 @@ public int hashCode() havingFilter, aggregateProject, sort, - sortProject + sortProject, + window ); } @@ -610,7 +579,7 @@ public String toString() ", aggregateProject=" + aggregateProject + ", sort=" + sort + ", sortProject=" + sortProject + - ", unnestProject=" + unnestProject + + ", window=" + window + '}'; } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java new file mode 100644 index 000000000000..8e2b8d8d72fb --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterLTransposeRule.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.Filter; + +/** + * Rule that pulls a {@link Filter} from the left-hand side of a {@link Correlate} above the Correlate. + * Allows subquery elimination. + * + * @see CorrelateFilterRTransposeRule similar, but for right-hand side filters + */ +public class CorrelateFilterLTransposeRule extends RelOptRule +{ + private static final CorrelateFilterLTransposeRule INSTANCE = new CorrelateFilterLTransposeRule(); + + public CorrelateFilterLTransposeRule() + { + super( + operand( + Correlate.class, + operand(Filter.class, any()), + operand(RelNode.class, any()) + )); + } + + public static CorrelateFilterLTransposeRule instance() + { + return INSTANCE; + } + + @Override + public void onMatch(final RelOptRuleCall call) + { + final Correlate correlate = call.rel(0); + final Filter left = call.rel(1); + final RelNode right = call.rel(2); + + call.transformTo( + call.builder() + .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left.getInput(), right))) + .filter(left.getCondition()) + .build() + ); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java new file mode 100644 index 000000000000..66731ca78ade --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/CorrelateFilterRTransposeRule.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Correlate; +import org.apache.calcite.rel.core.CorrelationId; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; + +/** + * Rule that pulls a {@link Filter} from the right-hand side of a {@link Correlate} above the Correlate. + * Allows filters on unnested fields to be added to queries that use {@link org.apache.druid.query.UnnestDataSource}. + * + * @see CorrelateFilterLTransposeRule similar, but for left-hand side filters + */ +public class CorrelateFilterRTransposeRule extends RelOptRule +{ + private static final CorrelateFilterRTransposeRule INSTANCE = new CorrelateFilterRTransposeRule(); + + public CorrelateFilterRTransposeRule() + { + super( + operand( + Correlate.class, + operand(RelNode.class, any()), + operand(Filter.class, any()) + )); + } + + public static CorrelateFilterRTransposeRule instance() + { + return INSTANCE; + } + + @Override + public boolean matches(RelOptRuleCall call) + { + final Correlate correlate = call.rel(0); + final Filter right = call.rel(2); + + // Can't pull up filters that explicitly refer to the correlation variable. + return !usesCorrelationId(correlate.getCorrelationId(), right.getCondition()); + } + + @Override + public void onMatch(final RelOptRuleCall call) + { + final Correlate correlate = call.rel(0); + final RelNode left = call.rel(1); + final Filter right = call.rel(2); + + call.transformTo( + call.builder() + .push(correlate.copy(correlate.getTraitSet(), ImmutableList.of(left, right.getInput()))) + .filter(RexUtil.shift(right.getCondition(), left.getRowType().getFieldCount())) + .build() + ); + } + + /** + * Whether an expression refers to correlation variables. + */ + private static boolean usesCorrelationId(final CorrelationId correlationId, final RexNode rexNode) + { + class CorrelationVisitor extends RexVisitorImpl + { + private boolean found = false; + + public CorrelationVisitor() + { + super(true); + } + + @Override + public Void visitCorrelVariable(RexCorrelVariable correlVariable) + { + if (correlVariable.id.equals(correlationId)) { + found = true; + } + return null; + } + } + + final CorrelationVisitor visitor = new CorrelationVisitor(); + rexNode.accept(visitor); + return visitor.found; + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java index 1870e11dd75c..d331bb2222d3 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java @@ -19,22 +19,29 @@ package org.apache.druid.sql.calcite.rule; +import it.unimi.dsi.fastutil.ints.IntAVLTreeSet; +import it.unimi.dsi.fastutil.ints.IntSet; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Correlate; -import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.CorrelationId; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldAccess; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.DruidCorrelateUnnestRel; -import org.apache.druid.sql.calcite.rel.DruidQueryRel; import org.apache.druid.sql.calcite.rel.DruidRel; -import org.apache.druid.sql.calcite.rel.DruidUnnestDatasourceRel; +import org.apache.druid.sql.calcite.rel.DruidRels; +import org.apache.druid.sql.calcite.rel.DruidUnnestRel; import org.apache.druid.sql.calcite.rel.PartialDruidQuery; import java.util.ArrayList; @@ -44,26 +51,28 @@ * This class creates the rule to abide by for creating correlations during unnest. * Typically, Calcite plans the unnest query such as * SELECT * from numFoo, unnest(dim3) in the following way: + * + *
  * 80:LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{3}])
  *   6:LogicalTableScan(subset=[rel#74:Subset#0.NONE.[]], table=[[druid, numfoo]])
  *   78:Uncollect(subset=[rel#79:Subset#3.NONE.[]])
  *     76:LogicalProject(subset=[rel#77:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)])
  *       7:LogicalValues(subset=[rel#75:Subset#1.NONE.[0]], tuples=[[{ 0 }]])
+ * 
* - * {@link DruidUnnestDatasourceRule} takes care of the Uncollect(last 3 lines) to generate a {@link DruidUnnestDatasourceRel} - * thereby reducing the logical plan to: + * {@link DruidUnnestRule} takes care of the Uncollect(last 3 lines) to generate a {@link DruidUnnestRel} + * thereby reducing the logical plan to: + *
  *        LogicalCorrelate
  *           /       \
  *      DruidRel    DruidUnnestDataSourceRel
- *
- *  This forms the premise of this rule. The goal is to transform the above-mentioned structure in the tree
- *  with a new rel {@link DruidCorrelateUnnestRel} which shall be created here.
- *
+ * 
+ * This forms the premise of this rule. The goal is to transform the above-mentioned structure in the tree + * with a new rel {@link DruidCorrelateUnnestRel} which shall be created here. */ public class DruidCorrelateUnnestRule extends RelOptRule { private final PlannerContext plannerContext; - private final boolean enableLeftScanDirect; public DruidCorrelateUnnestRule(final PlannerContext plannerContext) { @@ -71,107 +80,139 @@ public DruidCorrelateUnnestRule(final PlannerContext plannerContext) operand( Correlate.class, operand(DruidRel.class, any()), - operand(DruidUnnestDatasourceRel.class, any()) + operand(DruidUnnestRel.class, any()) ) ); this.plannerContext = plannerContext; - this.enableLeftScanDirect = plannerContext.queryContext().getEnableJoinLeftScanDirect(); } @Override public boolean matches(RelOptRuleCall call) { - final DruidRel druidRel = call.rel(1); - final DruidRel uncollectRel = call.rel(2); - - return druidRel.getPartialDruidQuery() != null - && uncollectRel.getPartialDruidQuery() != null; + final DruidRel left = call.rel(1); + return left.getPartialDruidQuery() != null; } - @Override public void onMatch(RelOptRuleCall call) { final Correlate correlate = call.rel(0); - final DruidRel druidRel = call.rel(1); - DruidUnnestDatasourceRel druidUnnestDatasourceRel = call.rel(2); - - - final RexBuilder rexBuilder = correlate.getCluster().getRexBuilder(); - - final Filter druidRelFilter; - final DruidRel newDruidRelFilter; - final List newProjectExprs = new ArrayList<>(); - - final boolean isLeftDirectAccessPossible = enableLeftScanDirect && (druidRel instanceof DruidQueryRel); - - if (druidRel.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT - && (isLeftDirectAccessPossible || druidRel.getPartialDruidQuery().getWhereFilter() == null)) { - // Swap the druidRel-side projection above the correlate, so the druidRel side is a simple scan or mapping. - // This helps us avoid subqueries. - final RelNode leftScan = druidRel.getPartialDruidQuery().getScan(); - final Project leftProject = druidRel.getPartialDruidQuery().getSelectProject(); - druidRelFilter = druidRel.getPartialDruidQuery().getWhereFilter(); - - // Left-side projection expressions rewritten to be on top of the correlate. - newProjectExprs.addAll(leftProject.getProjects()); - newDruidRelFilter = druidRel.withPartialQuery(PartialDruidQuery.create(leftScan)); - } else { - // Leave druidRel as-is. Write input refs that do nothing. - for (int i = 0; i < druidRel.getRowType().getFieldCount(); i++) { - newProjectExprs.add(rexBuilder.makeInputRef(correlate.getRowType().getFieldList().get(i).getType(), i)); + final DruidRel left = call.rel(1); + final DruidUnnestRel right = call.rel(2); + + if (DruidRels.isScanOrProject(left, true) + && left.getPartialDruidQuery().getSelectProject() != null + && RelOptUtil.InputFinder.bits(right.getUnnestProject().getProjects(), null).isEmpty()) { + // Pull left-side Project above the Correlate, so we can eliminate a subquery. + final RelNode leftScan = left.getPartialDruidQuery().getScan(); + final Project leftProject = left.getPartialDruidQuery().getSelectProject(); + + // Rewrite right-side Project on top of leftScan rather than leftProject. + final PushCorrelatedFieldAccessPastProject correlatedFieldRewriteShuttle = + new PushCorrelatedFieldAccessPastProject(correlate.getCorrelationId(), leftProject); + final List newRightProjectExprs = + correlatedFieldRewriteShuttle.apply(right.getUnnestProject().getProjects()); + + // Pull the Project out of the left side of the Correlate. + final DruidCorrelateUnnestRel druidCorrelateUnnest = DruidCorrelateUnnestRel.create( + correlate.copy( + correlate.getTraitSet(), + + // Left side: remove Project. + left.withPartialQuery(PartialDruidQuery.create(leftScan)), + + // Right side: use rewritten newRightProjectExprs, pushed past the left Project. + right.withUnnestProject( + right.getUnnestProject().copy( + right.getUnnestProject().getTraitSet(), + right.getUnnestProject().getInput(), + newRightProjectExprs, + right.getUnnestProject().getRowType() + ) + ), + correlate.getCorrelationId(), + ImmutableBitSet.of(correlatedFieldRewriteShuttle.getRequiredColumns()), + correlate.getJoinType() + ), + plannerContext + ); + + // Add right-side input refs to the Project, so it matches the full original Correlate. + final RexBuilder rexBuilder = correlate.getCluster().getRexBuilder(); + final List pulledUpProjects = new ArrayList<>(leftProject.getProjects()); + for (int i = 0 ; i < right.getRowType().getFieldCount(); i++ ) { + pulledUpProjects.add(rexBuilder.makeInputRef(druidCorrelateUnnest, i + leftScan.getRowType().getFieldCount())); } - newDruidRelFilter = druidRel; - druidRelFilter = null; - } - if (druidUnnestDatasourceRel.getPartialDruidQuery().stage() == PartialDruidQuery.Stage.SELECT_PROJECT) { - for (final RexNode rexNode : RexUtil.shift( - druidUnnestDatasourceRel.getPartialDruidQuery() - .getSelectProject() - .getProjects(), - newDruidRelFilter.getRowType().getFieldCount() - )) { - newProjectExprs.add(rexNode); - } + // Now push the Project back on top of the Correlate. + final RelBuilder relBuilder = + call.builder() + .push(druidCorrelateUnnest) + .project( + RexUtil.fixUp( + rexBuilder, + pulledUpProjects, + RelOptUtil.getFieldTypeList(druidCorrelateUnnest.getRowType()) + ) + ); + + final RelNode build = relBuilder.build(); + call.transformTo(build); } else { - for (int i = 0; i < druidUnnestDatasourceRel.getRowType().getFieldCount(); i++) { - newProjectExprs.add( - rexBuilder.makeInputRef( - correlate.getRowType() - .getFieldList() - .get(druidRel.getRowType().getFieldCount() + i) - .getType(), - newDruidRelFilter.getRowType().getFieldCount() + i - ) - ); - } + call.transformTo(DruidCorrelateUnnestRel.create(correlate, plannerContext)); } + } - final DruidCorrelateUnnestRel druidCorr = DruidCorrelateUnnestRel.create( - correlate.copy( - correlate.getTraitSet(), - newDruidRelFilter, - druidUnnestDatasourceRel, - correlate.getCorrelationId(), - correlate.getRequiredColumns(), - correlate.getJoinType() - ), - druidRelFilter, - plannerContext - ); + /** + * Shuttle that pushes correlating variable accesses past a Project. + */ + private static class PushCorrelatedFieldAccessPastProject extends RexShuttle + { + private final CorrelationId correlationId; + private final Project project; + + // "Sidecar" return value: computed along with the shuttling. + private final IntSet requiredColumns = new IntAVLTreeSet(); + + public PushCorrelatedFieldAccessPastProject( + final CorrelationId correlationId, + final Project project + ) + { + this.correlationId = correlationId; + this.project = project; + } + public IntSet getRequiredColumns() + { + return requiredColumns; + } - final RelBuilder relBuilder = - call.builder() - .push(druidCorr) - .project(RexUtil.fixUp( - rexBuilder, - newProjectExprs, - RelOptUtil.getFieldTypeList(druidCorr.getRowType()) - )); + @Override + public RexNode visitFieldAccess(final RexFieldAccess fieldAccess) + { + if (fieldAccess.getReferenceExpr() instanceof RexCorrelVariable) { + final RexCorrelVariable encounteredCorrelVariable = (RexCorrelVariable) fieldAccess.getReferenceExpr(); + if (encounteredCorrelVariable.id.equals(correlationId)) { + final RexNode projectExpr = project.getProjects().get(fieldAccess.getField().getIndex()); + + // Rewrite RexInputRefs as correlation variable accesses. + final RexBuilder rexBuilder = project.getCluster().getRexBuilder(); + final RexNode newCorrel = rexBuilder.makeCorrel(project.getInput().getRowType(), correlationId); + return new RexShuttle() + { + @Override + public RexNode visitInputRef(RexInputRef inputRef) + { + requiredColumns.add(inputRef.getIndex()); + return project.getCluster().getRexBuilder().makeFieldAccess(newCorrel, inputRef.getIndex()); + } + }.apply(projectExpr); + } + } - call.transformTo(relBuilder.build()); + return super.visitFieldAccess(fieldAccess); + } } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidLogicalValuesRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidLogicalValuesRule.java index a4660ba853cd..b94a6ee4ac89 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidLogicalValuesRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidLogicalValuesRule.java @@ -31,12 +31,9 @@ import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.planner.UnsupportedSQLQueryException; import org.apache.druid.sql.calcite.rel.DruidQueryRel; -import org.apache.druid.sql.calcite.table.DruidTable; -import org.apache.druid.sql.calcite.table.InlineTable; import org.apache.druid.sql.calcite.table.RowSignatures; import javax.annotation.Nullable; - import java.util.List; import java.util.stream.Collectors; @@ -78,12 +75,12 @@ public void onMatch(RelOptRuleCall call) values.getRowType().getFieldNames(), values.getRowType() ); - final DruidTable druidTable = new InlineTable( - InlineDataSource.fromIterable(objectTuples, rowSignature), - rowSignature - ); call.transformTo( - DruidQueryRel.scanValues(values, druidTable, plannerContext) + DruidQueryRel.scanConstantRel( + values, + InlineDataSource.fromIterable(objectTuples, rowSignature), + plannerContext + ) ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java index 4276a48b2f13..d359328428ff 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidRules.java @@ -29,12 +29,13 @@ import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.Window; +import org.apache.calcite.rel.rules.ProjectCorrelateTransposeRule; import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.rel.DruidOuterQueryRel; -import org.apache.druid.sql.calcite.rel.DruidQuery; import org.apache.druid.sql.calcite.rel.DruidRel; import org.apache.druid.sql.calcite.rel.PartialDruidQuery; +import org.apache.druid.sql.calcite.run.EngineFeature; import java.util.ArrayList; import java.util.List; @@ -97,16 +98,23 @@ public static List rules(PlannerContext plannerContext) new DruidUnionRule(plannerContext), new DruidUnionDataSourceRule(plannerContext), DruidSortUnionRule.instance(), - DruidJoinRule.instance(plannerContext), - new DruidUnnestDatasourceRule(plannerContext), - new DruidCorrelateUnnestRule(plannerContext) + DruidJoinRule.instance(plannerContext) ) ); - if (plannerContext.queryContext().getBoolean(DruidQuery.CTX_ENABLE_WINDOW_FNS, false)) { + if (plannerContext.featureAvailable(EngineFeature.WINDOW_FUNCTIONS)) { retVal.add(new DruidQueryRule<>(Window.class, PartialDruidQuery.Stage.WINDOW, PartialDruidQuery::withWindow)); retVal.add(DruidOuterQueryRule.WINDOW); } + + if (plannerContext.featureAvailable(EngineFeature.UNNEST)) { + retVal.add(new DruidUnnestRule(plannerContext)); + retVal.add(new DruidCorrelateUnnestRule(plannerContext)); + retVal.add(ProjectCorrelateTransposeRule.INSTANCE); + retVal.add(CorrelateFilterLTransposeRule.instance()); + retVal.add(CorrelateFilterRTransposeRule.instance()); + } + return retVal; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java deleted file mode 100644 index e8123fe0670c..000000000000 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestDatasourceRule.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.sql.calcite.rule; - -import com.google.common.collect.ImmutableList; -import org.apache.calcite.plan.RelOptRule; -import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.core.Uncollect; -import org.apache.calcite.rel.logical.LogicalProject; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.tools.RelBuilder; -import org.apache.druid.sql.calcite.planner.PlannerContext; -import org.apache.druid.sql.calcite.rel.DruidQueryRel; -import org.apache.druid.sql.calcite.rel.DruidUnnestDatasourceRel; - -/** - * This class creates the rule to abide by for creating unnest (internally uncollect) in Calcite. - * Typically, Calcite plans the *unnest* part of the query involving a table such as - * SELECT * from numFoo, unnest(dim3) - * or even a standalone unnest query such as - * SELECT * from unnest(ARRAY[1,2,3]) in the following way: - * 78:Uncollect(subset=[rel#79:Subset#3.NONE.[]]) - * 76:LogicalProject(subset=[rel#77:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)]) - * 7:LogicalValues(subset=[rel#75:Subset#1.NONE.[0]], tuples=[[{ 0 }]]) - * - * Calcite tackles plans bottom up. Therefore, - * {@link DruidLogicalValuesRule} converts the LogicalValues part into a leaf level {@link DruidQueryRel} - * thereby creating the following subtree in the call tree - * - * Uncollect - * \ - * LogicalProject - * \ - * DruidQueryRel - * - * - * This forms the premise of this rule. The goal is to transform the above-mentioned structure in the tree - * with a new rel {@link DruidUnnestDatasourceRel} which shall be created here. - * - */ -public class DruidUnnestDatasourceRule extends RelOptRule -{ - private final PlannerContext plannerContext; - - public DruidUnnestDatasourceRule(PlannerContext plannerContext) - { - super( - operand( - Uncollect.class, - operand(LogicalProject.class, operand(DruidQueryRel.class, none())) - ) - ); - this.plannerContext = plannerContext; - } - - @Override - public boolean matches(RelOptRuleCall call) - { - return true; - } - - @Override - public void onMatch(final RelOptRuleCall call) - { - final Uncollect uncollectRel = call.rel(0); - final LogicalProject logicalProject = call.rel(1); - final DruidQueryRel druidQueryRel = call.rel(2); - - final RexBuilder rexBuilder = logicalProject.getCluster().getRexBuilder(); - - final LogicalProject queryProject = LogicalProject.create( - uncollectRel, - ImmutableList.of(rexBuilder.makeInputRef(uncollectRel.getRowType().getFieldList().get(0).getType(), 0)), - uncollectRel.getRowType() - ); - - DruidUnnestDatasourceRel unnestDatasourceRel = new DruidUnnestDatasourceRel( - uncollectRel, - druidQueryRel.withPartialQuery(druidQueryRel.getPartialDruidQuery().withSelectProject(queryProject)), - logicalProject, - plannerContext - ); - - final RelBuilder relBuilder = - call.builder() - .push(unnestDatasourceRel); - - call.transformTo(relBuilder.build()); - } -} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java new file mode 100644 index 000000000000..c482fcd9cbfe --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rule; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Uncollect; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.druid.math.expr.Expr; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.InputBindings; +import org.apache.druid.query.InlineDataSource; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.expression.Expressions; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.DruidQueryRel; +import org.apache.druid.sql.calcite.rel.DruidUnnestRel; +import org.apache.druid.sql.calcite.table.RowSignatures; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; + +/** + * This class creates the rule to abide by for creating unnest (internally uncollect) in Calcite. + * Typically, Calcite plans the *unnest* part of the query involving a table such as + *
+ * SELECT * from numFoo, unnest(dim3)
+ * 
+ * or even a standalone unnest query such as + *
+ * SELECT * from unnest(ARRAY[1,2,3]) in the following way:
+ *   78:Uncollect(subset=[rel#79:Subset#3.NONE.[]])
+ *     76:LogicalProject(subset=[rel#77:Subset#2.NONE.[]], EXPR$0=[MV_TO_ARRAY($cor0.dim3)])
+ *       7:LogicalValues(subset=[rel#75:Subset#1.NONE.[0]], tuples=[[{ 0 }]])
+ * 
+ * Calcite tackles plans bottom up. Therefore, + * {@link DruidLogicalValuesRule} converts the LogicalValues part into a leaf level {@link DruidQueryRel} + * thereby creating the following subtree in the call tree + * + *
+ * Uncollect
+ *  \
+ *  LogicalProject
+ *   \
+ *   DruidQueryRel
+ * 
+ * + * This forms the premise of this rule. The goal is to transform the above-mentioned structure in the tree + * with a new rel {@link DruidUnnestRel} which shall be created here. + */ +public class DruidUnnestRule extends RelOptRule +{ + private final PlannerContext plannerContext; + + public DruidUnnestRule(PlannerContext plannerContext) + { + super( + operand( + Uncollect.class, + operand(Project.class, operand(Values.class, none())) + ) + ); + this.plannerContext = plannerContext; + } + + @Override + public boolean matches(RelOptRuleCall call) + { + final Project projectRel = call.rel(1); + final Values valuesRel = call.rel(2); + + // Project must be a single field on top of a single row, and not refer to any bits of the input. + // (The single row is a dummy row. We expect the Project expr to be a constant or a correlated field access.) + return projectRel.getProjects().size() == 1 + && valuesRel.getTuples().size() == 1 + && RelOptUtil.InputFinder.bits(projectRel.getProjects(), null).isEmpty(); + } + + @Override + public void onMatch(final RelOptRuleCall call) + { + final Uncollect uncollectRel = call.rel(0); + final Project projectRel = call.rel(1); + + if (RexUtil.isConstant(projectRel.getProjects().get(0))) { + // Constant expression: transform to DruidQueryRel on an inline datasource. + final InlineDataSource inlineDataSource = toInlineDataSource( + uncollectRel, + projectRel.getProjects().get(0), + plannerContext + ); + + if (inlineDataSource != null) { + call.transformTo( + DruidQueryRel.scanConstantRel( + uncollectRel, + inlineDataSource, + plannerContext + ) + ); + } + } else { + // Transform to DruidUnnestRel, a holder for an unnest of a correlated variable. + call.transformTo( + DruidUnnestRel.create( + (Uncollect) uncollectRel.copy(uncollectRel.getTraitSet(), projectRel), + plannerContext + ) + ); + } + } + + @Nullable + private static InlineDataSource toInlineDataSource( + final Uncollect uncollectRel, + final RexNode projectExpr, + final PlannerContext plannerContext + ) + { + final DruidExpression expression = Expressions.toDruidExpression( + plannerContext, + RowSignature.empty(), + projectExpr + ); + + if (expression == null) { + return null; + } + + // Evaluate the expression. It's a constant, so no bindings are needed. + final Expr parsedExpression = expression.parse(plannerContext.getExprMacroTable()); + final ExprEval eval = parsedExpression.eval(InputBindings.nilBindings()); + final List rows = new ArrayList<>(); + + if (eval.isArray()) { + final Object[] evalArray = eval.asArray(); + if (evalArray != null) { + for (Object o : evalArray) { + rows.add(new Object[]{o}); + } + } + } else { + rows.add(new Object[]{eval.valueOrDefault()}); + } + + // Transform to inline datasource. + final RowSignature rowSignature = RowSignatures.fromRelDataType( + uncollectRel.getRowType().getFieldNames(), + uncollectRel.getRowType() + ); + + return InlineDataSource.fromIterable(rows, rowSignature); + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java index e8a9bd969de1..322c2000ce1e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/EngineFeature.java @@ -23,7 +23,7 @@ import org.apache.druid.sql.calcite.planner.PlannerContext; /** - * Arguments to {@link SqlEngine#feature(EngineFeature, PlannerContext)}. + * Arguments to {@link SqlEngine#featureAvailable(EngineFeature, PlannerContext)}. */ public enum EngineFeature { @@ -78,5 +78,20 @@ public enum EngineFeature * Planner is permitted to use a {@link org.apache.calcite.runtime.Bindable} plan on local resources, instead * of {@link QueryMaker}, for SELECT query implementation. Used for system tables and the like. */ - ALLOW_BINDABLE_PLAN + ALLOW_BINDABLE_PLAN, + + /** + * Queries can use GROUPING SETS. + */ + GROUPING_SETS, + + /** + * Queries can use window functions. + */ + WINDOW_FUNCTIONS, + + /** + * Queries can use {@link org.apache.calcite.sql.fun.SqlStdOperatorTable#UNNEST}. + */ + UNNEST } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java index 42bd83b0de5b..5156a152fb38 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeSqlEngine.java @@ -93,13 +93,16 @@ public RelDataType resultTypeForInsert(RelDataTypeFactory typeFactory, RelDataTy } @Override - public boolean feature(EngineFeature feature, PlannerContext plannerContext) + public boolean featureAvailable(EngineFeature feature, PlannerContext plannerContext) { switch (feature) { case CAN_SELECT: case ALLOW_BINDABLE_PLAN: case TIMESERIES_QUERY: case TOPN_QUERY: + case GROUPING_SETS: + case WINDOW_FUNCTIONS: + case UNNEST: return true; case TIME_BOUNDARY_QUERY: return plannerContext.queryContext().isTimeBoundaryPlanningEnabled(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java index 2734cd09b7fd..22c8545dd67e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/SqlEngine.java @@ -40,7 +40,7 @@ public interface SqlEngine /** * Whether a feature applies to this engine or not. */ - boolean feature(EngineFeature feature, PlannerContext plannerContext); + boolean featureAvailable(EngineFeature feature, PlannerContext plannerContext); /** * Validates a provided query context. Returns quietly if the context is OK; throws {@link ValidationException} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/table/InlineTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/table/InlineTable.java index e8a7f8e2987d..8612f1ac94b2 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/table/InlineTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/table/InlineTable.java @@ -26,7 +26,6 @@ import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.druid.query.DataSource; import org.apache.druid.query.InlineDataSource; -import org.apache.druid.segment.column.RowSignature; /** * Represents a specialized table used within Druid's Calcite-based planner. @@ -38,12 +37,9 @@ public class InlineTable extends DruidTable { private final DataSource dataSource; - public InlineTable( - final InlineDataSource dataSource, - final RowSignature rowSignature - ) + public InlineTable(final InlineDataSource dataSource) { - super(rowSignature); + super(dataSource.getRowSignature()); this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource"); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java index 3bc1acdce8a2..740cac15ee46 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/view/ViewSqlEngine.java @@ -50,7 +50,7 @@ public String name() } @Override - public boolean feature(EngineFeature feature, PlannerContext plannerContext) + public boolean featureAvailable(EngineFeature feature, PlannerContext plannerContext) { switch (feature) { // Use most permissive set of SELECT features, since our goal is to get the row type of the view. @@ -60,6 +60,9 @@ public boolean feature(EngineFeature feature, PlannerContext plannerContext) case ALLOW_BINDABLE_PLAN: case READ_EXTERNAL_DATA: case SCAN_ORDER_BY_NON_TIME: + case GROUPING_SETS: + case WINDOW_FUNCTIONS: + case UNNEST: return true; // Views can't sit on top of INSERT or REPLACE. diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index 4d511dcc9f8a..a763a4f4028b 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -27,6 +27,7 @@ import com.google.common.collect.ImmutableMap; import com.google.inject.Injector; import org.apache.calcite.plan.RelOptPlanner; +import org.apache.commons.text.StringEscapeUtils; import org.apache.druid.annotations.UsedByJUnitParamsRunner; import org.apache.druid.common.config.NullHandling; import org.apache.druid.guice.DruidInjectorBuilder; @@ -79,8 +80,6 @@ import org.apache.druid.server.security.AuthenticationResult; import org.apache.druid.server.security.ForbiddenException; import org.apache.druid.server.security.ResourceAction; -import org.apache.druid.sql.PreparedStatement; -import org.apache.druid.sql.SqlQueryPlus; import org.apache.druid.sql.SqlStatementFactory; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.planner.Calcites; @@ -894,16 +893,6 @@ public boolean isRunningMSQ() } } - public Set analyzeResources( - final SqlStatementFactory sqlStatementFactory, - final SqlQueryPlus query - ) - { - PreparedStatement stmt = sqlStatementFactory.preparedStatement(query); - stmt.prepare(); - return stmt.allResources(); - } - public void assertResultsEquals(String sql, List expectedResults, List results) { for (int i = 0; i < results.size(); i++) { @@ -1231,30 +1220,7 @@ public static void displayResults(List results) PrintStream out = System.out; out.println("-- Actual results --"); for (int rowIndex = 0; rowIndex < results.size(); rowIndex++) { - Object[] row = results.get(rowIndex); - out.print("new Object[] {"); - for (int colIndex = 0; colIndex < row.length; colIndex++) { - Object col = row[colIndex]; - if (colIndex > 0) { - out.print(", "); - } - if (col == null) { - out.print("null"); - } else if (col instanceof String) { - out.print("\""); - out.print(col); - out.print("\""); - } else if (col instanceof Long) { - out.print(col); - out.print("L"); - } else if (col instanceof Double) { - out.print(col); - out.print("D"); - } else { - out.print(col); - } - } - out.print("}"); + printArray(results.get(rowIndex), out); if (rowIndex < results.size() - 1) { out.print(","); } @@ -1262,4 +1228,45 @@ public static void displayResults(List results) } out.println("----"); } + + private static void printArray(final Object[] array, final PrintStream out) + { + printArrayImpl(array, out, "new Object[]{", "}"); + } + + private static void printList(final List list, final PrintStream out) + { + printArrayImpl(list.toArray(new Object[0]), out, "ImmutableList.of(", ")"); + } + + private static void printArrayImpl(final Object[] array, final PrintStream out, final String pre, final String post) + { + out.print(pre); + for (int colIndex = 0; colIndex < array.length; colIndex++) { + Object col = array[colIndex]; + if (colIndex > 0) { + out.print(", "); + } + if (col == null) { + out.print("null"); + } else if (col instanceof String) { + out.print("\""); + out.print(StringEscapeUtils.escapeJava((String) col)); + out.print("\""); + } else if (col instanceof Long) { + out.print(col); + out.print("L"); + } else if (col instanceof Double) { + out.print(col); + out.print("D"); + } else if (col instanceof Object[]) { + printArray(array, out); + } else if (col instanceof List) { + printList((List) col, out); + } else { + out.print(col); + } + } + out.print(post); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 119cae8634ec..338d1f56c179 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -20,6 +20,7 @@ package org.apache.druid.sql.calcite; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.HumanReadableBytes; @@ -31,6 +32,7 @@ import org.apache.druid.query.Druids; import org.apache.druid.query.InlineDataSource; import org.apache.druid.query.Query; +import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryDataSource; import org.apache.druid.query.TableDataSource; import org.apache.druid.query.UnnestDataSource; @@ -58,6 +60,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.join.JoinType; import org.apache.druid.sql.calcite.filtration.Filtration; +import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.util.CalciteTests; import org.junit.Assert; import org.junit.Test; @@ -65,12 +68,20 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; /** * Tests for array functions and array types */ public class CalciteArraysQueryTest extends BaseCalciteQueryTest { + private static final Map QUERY_CONTEXT_UNNEST = + ImmutableMap.builder() + .putAll(QUERY_CONTEXT_DEFAULT) + .put(PlannerContext.CTX_ENABLE_UNNEST, true) + .put(QueryContexts.CTX_SQL_STRINGIFY_ARRAYS, false) + .build(); + // test some query stuffs, sort of limited since no native array column types so either need to use constructor or // array aggregator @Test @@ -2125,6 +2136,7 @@ public void testArrayAggAsArrayFromJoin() } testQuery( "SELECT numfoo.dim4, j.arr, ARRAY_TO_STRING(j.arr, ',') FROM numfoo INNER JOIN (SELECT dim4, ARRAY_AGG(DISTINCT dim1) as arr FROM numfoo WHERE dim1 is not null GROUP BY 1) as j ON numfoo.dim4 = j.dim4", + QUERY_CONTEXT_DEFAULT, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource( @@ -2480,6 +2492,7 @@ public void testArrayAggArrayContainsSubquery() } testQuery( "SELECT dim1,dim2 FROM foo WHERE ARRAY_CONTAINS((SELECT ARRAY_AGG(DISTINCT dim1) FROM foo WHERE dim1 is not null), dim1)", + QUERY_CONTEXT_DEFAULT, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource( @@ -2559,6 +2572,7 @@ public void testArrayAggGroupByArrayContainsSubquery() } testQuery( "SELECT dim2, COUNT(*) FROM foo WHERE ARRAY_CONTAINS((SELECT ARRAY_AGG(DISTINCT dim1) FROM foo WHERE dim1 is not null), dim1) GROUP BY 1", + QUERY_CONTEXT_DEFAULT, ImmutableList.of( GroupByQuery.builder() .setDataSource( @@ -2650,23 +2664,23 @@ public void testUnnestInline() cannotVectorize(); testQuery( "SELECT * FROM UNNEST(ARRAY[1,2,3])", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource( - UnnestDataSource.create( - InlineDataSource.fromIterable( - ImmutableList.of(new Object[]{new Object[]{1L, 2L, 3L}}), - RowSignature.builder().add("inline", ColumnType.LONG_ARRAY).build() + InlineDataSource.fromIterable( + ImmutableList.of( + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L} ), - "inline", - "EXPR$0", - null + RowSignature.builder().add("EXPR$0", ColumnType.LONG).build() ) ) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) + .context(QUERY_CONTEXT_UNNEST) .columns(ImmutableList.of( "EXPR$0" )) @@ -2691,21 +2705,19 @@ public void testUnnest() cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" - )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) .build() ), useDefault ? @@ -2732,6 +2744,139 @@ public void testUnnest() ); } + @Test + public void testUnnestTwice() + { + cannotVectorize(); + testQuery( + "SELECT dim1, MV_TO_ARRAY(dim3), STRING_TO_ARRAY(dim1, U&'\\005C.') AS dim1_split, dim1_split_unnest, dim3_unnest\n" + + "FROM\n" + + " druid.numfoo,\n" + + " UNNEST(STRING_TO_ARRAY(dim1, U&'\\005C.')) as t2 (dim1_split_unnest),\n" + + " UNNEST(MV_TO_ARRAY(dim3)) as t3 (dim3_unnest)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn( + "j0.unnest", + "string_to_array(\"dim1\",'\\u005C.')", + ColumnType.STRING_ARRAY + ), + null + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dim3\"", + ColumnType.STRING + ), + null + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn( + "v0", + "mv_to_array(\"dim3\")", + ColumnType.STRING_ARRAY + ), + expressionVirtualColumn( + "v1", + "string_to_array(\"dim1\",'\\u005C.')", + ColumnType.STRING_ARRAY + ) + ) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("_j0.unnest", "dim1", "j0.unnest", "v0", "v1")) + .build() + ), + ImmutableList.of( + new Object[]{"", ImmutableList.of("a", "b"), useDefault ? null : ImmutableList.of(""), "", "a"}, + new Object[]{"", ImmutableList.of("a", "b"), useDefault ? null : ImmutableList.of(""), "", "b"}, + new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "10", "b"}, + new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "10", "c"}, + new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "1", "b"}, + new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "1", "c"}, + new Object[]{"2", ImmutableList.of("d"), ImmutableList.of("2"), "2", "d"}, + new Object[]{"1", useDefault ? null : ImmutableList.of(""), ImmutableList.of("1"), "1", ""}, + new Object[]{"def", null, ImmutableList.of("def"), "def", NullHandling.defaultStringValue()}, + new Object[]{"abc", null, ImmutableList.of("abc"), "abc", NullHandling.defaultStringValue()} + ) + ); + } + + @Test + public void testUnnestTwiceWithFiltersAndExpressions() + { + cannotVectorize(); + testQuery( + "SELECT dim1, MV_TO_ARRAY(dim3), STRING_TO_ARRAY(dim1, U&'\\005C.') AS dim1_split, dim1_split_unnest, dim3_unnest || 'xx' AS dim3_unnest\n" + + "FROM\n" + + " druid.numfoo,\n" + + " UNNEST(STRING_TO_ARRAY(dim1, U&'\\005C.')) as t2 (dim1_split_unnest),\n" + + " UNNEST(MV_TO_ARRAY(dim3)) as t3 (dim3_unnest)" + + "WHERE t2.dim1_split_unnest IN ('1', '2')", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource( + UnnestDataSource.create( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn( + "j0.unnest", + "string_to_array(\"dim1\",'\\u005C.')", + ColumnType.STRING_ARRAY + ), + null + ), + expressionVirtualColumn( + "_j0.unnest", + "\"dim3\"", + ColumnType.STRING + ), + null + ) + ) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + expressionVirtualColumn( + "v0", + "mv_to_array(\"dim3\")", + ColumnType.STRING_ARRAY + ), + expressionVirtualColumn( + "v1", + "string_to_array(\"dim1\",'\\u005C.')", + ColumnType.STRING_ARRAY + ), + expressionVirtualColumn( + "v2", + "concat(\"_j0.unnest\",'xx')", + ColumnType.STRING + ) + ) + .filters(in("j0.unnest", ImmutableList.of("1", "2"), null)) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("dim1", "j0.unnest", "v0", "v1", "v2")) + .build() + ), + ImmutableList.of( + new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "1", "bxx"}, + new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "1", "cxx"}, + new Object[]{"2", ImmutableList.of("d"), ImmutableList.of("2"), "2", "dxx"}, + new Object[]{"1", null, ImmutableList.of("1"), "1", "xx"} + ) + ); + } + @Test public void testUnnestWithGroupBy() { @@ -2743,19 +2888,19 @@ public void testUnnestWithGroupBy() cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) GROUP BY d3 ", + QUERY_CONTEXT_UNNEST, ImmutableList.of( GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .setInterval(querySegmentSpec(Filtration.eternity())) - .setContext(QUERY_CONTEXT_DEFAULT) - .setDimensions(new DefaultDimensionSpec("EXPR$0", "_d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_UNNEST) + .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .setGranularity(Granularities.ALL) - .setContext(QUERY_CONTEXT_DEFAULT) + .setContext(QUERY_CONTEXT_UNNEST) .build() ), useDefault ? @@ -2788,17 +2933,17 @@ public void testUnnestWithGroupByOrderBy() cannotVectorize(); testQuery( "SELECT d3, COUNT(*) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) AS unnested(d3) GROUP BY d3 ORDER BY d3 DESC ", + QUERY_CONTEXT_UNNEST, ImmutableList.of( GroupByQuery.builder() .setDataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .setInterval(querySegmentSpec(Filtration.eternity())) - .setContext(QUERY_CONTEXT_DEFAULT) - .setDimensions(new DefaultDimensionSpec("EXPR$0", "_d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_UNNEST) + .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setLimitSpec( DefaultLimitSpec @@ -2811,7 +2956,7 @@ public void testUnnestWithGroupByOrderBy() .build() ) .setAggregatorSpecs(new CountAggregatorFactory("a0")) - .setContext(QUERY_CONTEXT_DEFAULT) + .setContext(QUERY_CONTEXT_UNNEST) .build() ), useDefault ? @@ -2844,20 +2989,20 @@ public void testUnnestWithGroupByOrderByWithLimit() cannotVectorize(); testQuery( "SELECT d3, COUNT(*) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) AS unnested(d3) GROUP BY d3 ORDER BY d3 ASC LIMIT 4 ", + QUERY_CONTEXT_UNNEST, ImmutableList.of( new TopNQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .intervals(querySegmentSpec(Filtration.eternity())) - .dimension(new DefaultDimensionSpec("EXPR$0", "_d0", ColumnType.STRING)) + .dimension(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)) .threshold(4) .aggregators(aggregators(new CountAggregatorFactory("a0"))) - .context(QUERY_CONTEXT_DEFAULT) + .context(QUERY_CONTEXT_UNNEST) .build() ), useDefault ? @@ -2888,21 +3033,19 @@ public void testUnnestWithLimit() cannotVectorize(); testQuery( "SELECT d3 FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3) LIMIT 3", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" - )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) .limit(3) .build() ), @@ -2925,21 +3068,19 @@ public void testUnnestFirstQueryOnSelect() cannotVectorize(); testQuery( "SELECT d3 FROM (select dim1, dim2, dim3 from druid.numfoo), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" - )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) .build() ), useDefault ? @@ -2977,6 +3118,7 @@ public void testUnnestWithFilters() cannotVectorize(); testQuery( "SELECT d3 FROM (select * from druid.numfoo where dim2='a'), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( @@ -2986,43 +3128,21 @@ public void testUnnestWithFilters() new TableDataSource(CalciteTests.DATASOURCE3) ) .intervals(querySegmentSpec(Filtration.eternity())) - .virtualColumns(expressionVirtualColumn("v0", "'a'", ColumnType.STRING)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .filters(new SelectorDimFilter("dim2", "a", null)) - .columns( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "l1", - "l2", - "m1", - "m2", - "unique_dim1", - "v0" - ) - .context(QUERY_CONTEXT_DEFAULT) + .columns("dim3") + .context(QUERY_CONTEXT_UNNEST) .build() ), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" - )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) .build() ), ImmutableList.of( @@ -3034,16 +3154,16 @@ public void testUnnestWithFilters() } @Test - public void testUnnestWithInFilters() + public void testUnnestWithFiltersInsideAndOutside() { - // This tells the test to skip generating (vectorize = force) path - // Generates only 1 native query with vectorize = false skipVectorize(); - // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization - // Generates 2 native queries with 2 different values of vectorize - cannotVectorize(); testQuery( - "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc')), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + "SELECT d3 FROM\n" + + " (select * from druid.numfoo where dim2='a') t,\n" + + " UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)\n" + + "WHERE t.dim1 <> 'foo'\n" + + "AND unnested.d3 <> 'b'", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( @@ -3055,40 +3175,99 @@ public void testUnnestWithInFilters() .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) - .columns( - "__time", - "cnt", - "d1", - "d2", - "dim1", - "dim2", - "dim3", - "dim4", - "dim5", - "dim6", - "f1", - "f2", - "l1", - "l2", - "m1", - "m2", - "unique_dim1" + .filters( + and( + selector("dim2", "a", null), + not(selector("dim1", "foo", null)) + ) ) - .context(QUERY_CONTEXT_DEFAULT) + .columns("dim3") + .context(QUERY_CONTEXT_UNNEST) .build() ), - "dim3", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters(not(selector("j0.unnest", "b", null))) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{""} + ) + ); + } + + @Test + public void testUnnestWithFiltersOutside() + { + skipVectorize(); + testQuery( + "SELECT d3 FROM\n" + + " druid.numfoo t,\n" + + " UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)\n" + + "WHERE t.dim2='a'" + + "AND t.dim1 <> 'foo'\n" + + "AND unnested.d3 <> 'b'", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .filters( + and( + selector("dim2", "a", null), + not(selector("dim1", "foo", null)), + not(selector("j0.unnest", "b", null)) + ) + ) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) + .build() + ), + ImmutableList.of( + new Object[]{"a"}, + new Object[]{""} + ) + ); + } + + @Test + public void testUnnestWithInFilters() + { + // This tells the test to skip generating (vectorize = force) path + // Generates only 1 native query with vectorize = false + skipVectorize(); + // This tells that both vectorize = force and vectorize = false takes the same path of non vectorization + // Generates 2 native queries with 2 different values of vectorize + cannotVectorize(); + testQuery( + "SELECT d3 FROM (select * from druid.numfoo where dim2 IN ('a','b','ab','abc')), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null )) + .intervals(querySegmentSpec(Filtration.eternity())) + .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) .build() ), ImmutableList.of( @@ -3112,22 +3291,19 @@ public void testUnnestVirtualWithColumns() cannotVectorize(); testQuery( "SELECT strings FROM druid.numfoo, UNNEST(ARRAY[dim4, dim5]) as unnested (strings)", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - "v0", - "EXPR$0", + expressionVirtualColumn("j0.unnest", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY), null )) .intervals(querySegmentSpec(Filtration.eternity())) - .virtualColumns(expressionVirtualColumn("v0", "array(\"dim4\",\"dim5\")", ColumnType.STRING_ARRAY)) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" - )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.unnest")) .build() ), ImmutableList.of( @@ -3154,22 +3330,23 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() cannotVectorize(); testQuery( "SELECT d24, COUNT(*) FROM druid.numfoo, UNNEST(ARRAY[dim2, dim4]) AS unnested(d24) GROUP BY d24 ORDER BY d24 DESC ", + QUERY_CONTEXT_UNNEST, ImmutableList.of( GroupByQuery.builder() - .setDataSource(UnnestDataSource.create( - new TableDataSource(CalciteTests.DATASOURCE3), - "v0", - "EXPR$0", - null - )) - .setVirtualColumns(expressionVirtualColumn( - "v0", - "array(\"dim2\",\"dim4\")", - ColumnType.STRING_ARRAY - )) + .setDataSource( + UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn( + "j0.unnest", + "array(\"dim2\",\"dim4\")", + ColumnType.STRING_ARRAY + ), + null + ) + ) .setInterval(querySegmentSpec(Filtration.eternity())) - .setContext(QUERY_CONTEXT_DEFAULT) - .setDimensions(new DefaultDimensionSpec("EXPR$0", "_d0", ColumnType.STRING)) + .setContext(QUERY_CONTEXT_UNNEST) + .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) .setGranularity(Granularities.ALL) .setLimitSpec( DefaultLimitSpec @@ -3182,7 +3359,7 @@ public void testUnnestWithGroupByOrderByOnVirtualColumn() .build() ) .setAggregatorSpecs(new CountAggregatorFactory("a0")) - .setContext(QUERY_CONTEXT_DEFAULT) + .setContext(QUERY_CONTEXT_UNNEST) .build() ), useDefault ? @@ -3209,6 +3386,7 @@ public void testUnnestWithJoinOnTheLeft() cannotVectorize(); testQuery( "SELECT d3 from (SELECT * from druid.numfoo JOIN (select dim2 as t from druid.numfoo where dim2 IN ('a','b','ab','abc')) ON dim2=t), UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource(UnnestDataSource.create( @@ -3223,27 +3401,22 @@ public void testUnnestWithJoinOnTheLeft() .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .filters(new InDimFilter("dim2", ImmutableList.of("a", "b", "ab", "abc"), null)) - .columns( - "dim2" - ) - .context(QUERY_CONTEXT_DEFAULT) + .columns("dim2") + .context(QUERY_CONTEXT_UNNEST) .build() ), "j0.", "(\"dim2\" == \"j0.dim2\")", JoinType.INNER ), - "dim3", - "EXPR$0", + expressionVirtualColumn("_j0.unnest", "\"dim3\"", ColumnType.STRING), null )) .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" - )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("_j0.unnest")) .build() ), useDefault ? @@ -3278,32 +3451,19 @@ public void testUnnestWithConstant() cannotVectorize(); testQuery( "SELECT longs FROM druid.numfoo, UNNEST(ARRAY[1,2,3]) as unnested (longs)", + QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() .dataSource( join( new TableDataSource(CalciteTests.DATASOURCE3), - new QueryDataSource( - newScanQueryBuilder() - .dataSource( - UnnestDataSource.create( - InlineDataSource.fromIterable( - ImmutableList.of(new Object[]{new Object[]{1L, 2L, 3L}}), - RowSignature.builder().add("inline", ColumnType.LONG_ARRAY).build() - ), - "inline", - "EXPR$0", - null - ) - ) - .intervals(querySegmentSpec(Filtration.eternity())) - .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) - .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "EXPR$0" - )) - .build() + InlineDataSource.fromIterable( + ImmutableList.of( + new Object[]{1L}, + new Object[]{2L}, + new Object[]{3L} + ), + RowSignature.builder().add("EXPR$0", ColumnType.LONG).build() ), "j0.", "1", @@ -3313,10 +3473,8 @@ public void testUnnestWithConstant() .intervals(querySegmentSpec(Filtration.eternity())) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) - .context(QUERY_CONTEXT_DEFAULT) - .columns(ImmutableList.of( - "j0.EXPR$0" - )) + .context(QUERY_CONTEXT_UNNEST) + .columns(ImmutableList.of("j0.EXPR$0")) .build() ), ImmutableList.of( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java index 70db1cd58393..13568f4e7a78 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteScanSignatureTest.java @@ -108,9 +108,9 @@ public String name() } @Override - public boolean feature(EngineFeature feature, PlannerContext plannerContext) + public boolean featureAvailable(EngineFeature feature, PlannerContext plannerContext) { - return feature == EngineFeature.SCAN_NEEDS_SIGNATURE || parent.feature(feature, plannerContext); + return feature == EngineFeature.SCAN_NEEDS_SIGNATURE || parent.featureAvailable(feature, plannerContext); } @Override diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java index e385d60466f6..7fba1c581fd9 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteWindowQueryTest.java @@ -32,6 +32,7 @@ import org.apache.druid.query.operator.WindowOperatorQuery; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.sql.calcite.planner.PlannerContext; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; @@ -123,7 +124,7 @@ public void windowQueryTest() throws IOException testBuilder() .skipVectorize(true) .sql(input.sql) - .queryContext(ImmutableMap.of("windowsAreForClosers", true)) + .queryContext(ImmutableMap.of(PlannerContext.CTX_ENABLE_WINDOW_FNS, true)) .addCustomVerification(QueryVerification.ofResults(results -> { if (results.exception != null) { throw new RE(results.exception, "Failed to execute because of exception."); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java index 30692b80c85d..6968dab89788 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/IngestionTestSqlEngine.java @@ -74,7 +74,7 @@ public RelDataType resultTypeForInsert(RelDataTypeFactory typeFactory, RelDataTy } @Override - public boolean feature(final EngineFeature feature, final PlannerContext plannerContext) + public boolean featureAvailable(final EngineFeature feature, final PlannerContext plannerContext) { switch (feature) { case CAN_SELECT: From 9e7abfc345e1cc8a1f4e4e542342bd5cb17faa1d Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 01:42:19 -0800 Subject: [PATCH 2/9] Simplify DruidUnnestRel. --- .../sql/calcite/planner/QueryValidations.java | 2 +- .../calcite/rel/DruidCorrelateUnnestRel.java | 4 +- .../druid/sql/calcite/rel/DruidRels.java | 18 --- .../druid/sql/calcite/rel/DruidUnnestRel.java | 125 +++++++++--------- .../rule/DruidCorrelateUnnestRule.java | 32 ++--- .../sql/calcite/rule/DruidUnnestRule.java | 9 +- .../sql/calcite/CalciteArraysQueryTest.java | 9 +- 7 files changed, 89 insertions(+), 110 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java index 0d7496495c87..925117257d18 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/QueryValidations.java @@ -54,7 +54,7 @@ private static void validateNoIllegalRightyJoins( ) throws ValidationException { if (plannerContext.getJoinAlgorithm() == JoinAlgorithm.BROADCAST - && !plannerContext.engineHasFeature(EngineFeature.ALLOW_BROADCAST_RIGHTY_JOIN)) { + && !plannerContext.featureAvailable(EngineFeature.ALLOW_BROADCAST_RIGHTY_JOIN)) { class FindRightyJoin extends RelShuttleImpl { private Join found = null; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 960e50540e0c..b25a4bc964d3 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -163,7 +163,7 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) ); if (expressionToUnnest == null) { - throw new CannotBuildQueryException(unnestDatasourceRel.getUnnestProject(), rexNodeToUnnest); + throw new CannotBuildQueryException(unnestDatasourceRel, unnestDatasourceRel.getInputRexNode()); } // Final output row signature. @@ -369,7 +369,7 @@ private static RexNode getRexNodeToUnnest( // variable. This is the expression to unnest. final RexNode rexNodeToUnnest = new CorrelatedFieldAccessToInputRef(correlate.getCorrelationId()) - .apply(unnestDatasourceRel.getUnnestProject().getProjects().get(0)); + .apply(unnestDatasourceRel.getInputRexNode()); // Unwrap MV_TO_ARRAY if present. return unwrapMvToArray(rexNodeToUnnest); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java index 3e828491a399..c35c872544f1 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRels.java @@ -19,9 +19,6 @@ package org.apache.druid.sql.calcite.rel; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.calcite.rel.RelWriter; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.table.DruidTable; @@ -98,19 +95,4 @@ public static RowSignature dataSourceSignature(final DruidRel druidRel) ).toDruidQuery(false).getOutputRowSignature(); } } - - /** - * Return a JSON representation of a query suitable for implementing {@link DruidRel#explainTerms(RelWriter)}. - */ - public static String toQueryStringForExplaining(final DruidRel rel, final ObjectMapper mapper) - { - final DruidQuery druidQuery = rel.toDruidQueryForExplaining(); - - try { - return mapper.writeValueAsString(druidQuery.getQuery()); - } - catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java index a543e01dc4b5..f7809e2faee6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidUnnestRel.java @@ -19,24 +19,26 @@ package org.apache.druid.sql.calcite.rel; -import com.fasterxml.jackson.core.JsonProcessingException; import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; -import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Uncollect; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; import org.apache.druid.java.util.common.ISE; -import org.apache.druid.query.TableDataSource; import org.apache.druid.sql.calcite.planner.PlannerContext; -import org.apache.druid.sql.calcite.table.RowSignatures; import javax.annotation.Nullable; import java.util.Collections; import java.util.Set; /** - * Captures the unnest (i.e. {@link Uncollect}) part of a correlated unnesting join. + * Captures an unnest expression for a correlated join. Derived from an {@link Uncollect}. * * This rel cannot be executed directly. It is a holder of information for {@link DruidCorrelateUnnestRel}. * @@ -47,58 +49,68 @@ */ public class DruidUnnestRel extends DruidRel { - private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__unnest__"); + private static final String FIELD_NAME = "UNNEST"; /** - * An {@link Uncollect} on top of a {@link Project} that generates an expression to be unnested. The underlying - * {@link Project} is not expected to reference any bits of its input; instead it references either a constant or - * a correlation variable through a {@link org.apache.calcite.rex.RexFieldAccess}. + * Expression to be unnested. May be constant or may reference a correlation variable through a + * {@link org.apache.calcite.rex.RexFieldAccess}. */ - private final Uncollect uncollect; + private final RexNode inputRexNode; private DruidUnnestRel( final RelOptCluster cluster, final RelTraitSet traits, - final Uncollect uncollect, + final RexNode inputRexNode, final PlannerContext plannerContext ) { super(cluster, traits, plannerContext); - this.uncollect = uncollect; - - if (!(uncollect.getInputs().get(0) instanceof Project)) { - // Validate that the Uncollect reads from a Project. - throw new ISE( - "Uncollect must reference Project, but child was [%s]", - uncollect.getInputs().get(0) - ); - } + this.inputRexNode = inputRexNode; } - public static DruidUnnestRel create(final Uncollect uncollect, final PlannerContext plannerContext) + public static DruidUnnestRel create( + final RelOptCluster cluster, + final RelTraitSet traits, + final RexNode unnestRexNode, + final PlannerContext plannerContext + ) { + if (!RelOptUtil.InputFinder.bits(unnestRexNode).isEmpty()) { + throw new ISE("Expression must not include field references"); + } + return new DruidUnnestRel( - uncollect.getCluster(), - uncollect.getTraitSet(), - uncollect, + cluster, + traits, + unnestRexNode, plannerContext ); } - /** - * Uncollect (unnest) operation that references the {@link #getUnnestProject()}. - */ - public Uncollect getUncollect() + @Override + @SuppressWarnings("ObjectEquality") + public RelNode accept(RexShuttle shuttle) { - return uncollect; + final RexNode newInputRexNode = shuttle.apply(inputRexNode); + + if (newInputRexNode == inputRexNode) { + return this; + } else { + return new DruidUnnestRel( + getCluster(), + getTraitSet(), + newInputRexNode, + getPlannerContext() + ); + } } /** - * Project that generates the expression to be unnested. + * Expression to be unnested. */ - public Project getUnnestProject() + public RexNode getInputRexNode() { - return (Project) uncollect.getInputs().get(0); + return inputRexNode; } @Override @@ -114,17 +126,14 @@ public DruidUnnestRel withPartialQuery(PartialDruidQuery newQueryBuilder) } /** - * Returns a new rel with the {@link #getUnnestProject()} replaced. + * Returns a new rel with a new input. The output type is unchanged. */ - public DruidUnnestRel withUnnestProject(final Project newUnnestProject) + public DruidUnnestRel withUnnestRexNode(final RexNode newInputRexNode) { return new DruidUnnestRel( getCluster(), getTraitSet(), - (Uncollect) uncollect.copy( - uncollect.getTraitSet(), - newUnnestProject - ), + newInputRexNode, getPlannerContext() ); } @@ -139,18 +148,8 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) @Override public DruidQuery toDruidQueryForExplaining() { - return PartialDruidQuery - .create(uncollect) - .build( - DUMMY_DATA_SOURCE, - RowSignatures.fromRelDataType( - uncollect.getRowType().getFieldNames(), - uncollect.getRowType() - ), - getPlannerContext(), - getCluster().getRexBuilder(), - false - ); + // DruidUnnestRel is a holder for info for DruidCorrelateUnnestRel. It cannot be executed on its own. + throw new CannotBuildQueryException("Cannot execute UNNEST directly"); } @Nullable @@ -160,7 +159,7 @@ public DruidUnnestRel asDruidConvention() return new DruidUnnestRel( getCluster(), getTraitSet().replace(DruidConvention.instance()), - uncollect, + inputRexNode, getPlannerContext() ); } @@ -168,20 +167,7 @@ public DruidUnnestRel asDruidConvention() @Override public RelWriter explainTerms(RelWriter pw) { - final String queryString; - final DruidQuery druidQuery = toDruidQueryForExplaining(); - - try { - queryString = getPlannerContext().getJsonMapper().writeValueAsString(druidQuery.getQuery()); - } - catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - - return pw.item("unnestProject", getUnnestProject()) - .item("uncollect", getUncollect()) - .item("query", queryString) - .item("signature", druidQuery.getOutputRowSignature()); + return pw.item("expr", inputRexNode); } @Override @@ -193,6 +179,13 @@ public Set getDataSourceNames() @Override protected RelDataType deriveRowType() { - return uncollect.getRowType(); + return Uncollect.deriveUncollectRowType( + LogicalProject.create( + LogicalValues.createOneRow(getCluster()), + Collections.singletonList(inputRexNode), + Collections.singletonList(FIELD_NAME) + ), + false + ); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java index d331bb2222d3..fb444ea3bf92 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidCorrelateUnnestRule.java @@ -103,18 +103,18 @@ public void onMatch(RelOptRuleCall call) if (DruidRels.isScanOrProject(left, true) && left.getPartialDruidQuery().getSelectProject() != null - && RelOptUtil.InputFinder.bits(right.getUnnestProject().getProjects(), null).isEmpty()) { + && RelOptUtil.InputFinder.bits(right.getInputRexNode()).isEmpty()) { // Pull left-side Project above the Correlate, so we can eliminate a subquery. final RelNode leftScan = left.getPartialDruidQuery().getScan(); final Project leftProject = left.getPartialDruidQuery().getSelectProject(); - // Rewrite right-side Project on top of leftScan rather than leftProject. + // Rewrite right-side expression on top of leftScan rather than leftProject. + final CorrelationId newCorrelationId = correlate.getCluster().createCorrel(); final PushCorrelatedFieldAccessPastProject correlatedFieldRewriteShuttle = - new PushCorrelatedFieldAccessPastProject(correlate.getCorrelationId(), leftProject); - final List newRightProjectExprs = - correlatedFieldRewriteShuttle.apply(right.getUnnestProject().getProjects()); + new PushCorrelatedFieldAccessPastProject(correlate.getCorrelationId(), newCorrelationId, leftProject); + final RexNode newUnnestRexNode = correlatedFieldRewriteShuttle.apply(right.getInputRexNode()); - // Pull the Project out of the left side of the Correlate. + // Build the new Correlate rel and a DruidCorrelateUnnestRel wrapper. final DruidCorrelateUnnestRel druidCorrelateUnnest = DruidCorrelateUnnestRel.create( correlate.copy( correlate.getTraitSet(), @@ -122,16 +122,9 @@ public void onMatch(RelOptRuleCall call) // Left side: remove Project. left.withPartialQuery(PartialDruidQuery.create(leftScan)), - // Right side: use rewritten newRightProjectExprs, pushed past the left Project. - right.withUnnestProject( - right.getUnnestProject().copy( - right.getUnnestProject().getTraitSet(), - right.getUnnestProject().getInput(), - newRightProjectExprs, - right.getUnnestProject().getRowType() - ) - ), - correlate.getCorrelationId(), + // Right side: use rewritten newUnnestRexNode, pushed past the left Project. + right.withUnnestRexNode(newUnnestRexNode), + newCorrelationId, ImmutableBitSet.of(correlatedFieldRewriteShuttle.getRequiredColumns()), correlate.getJoinType() ), @@ -141,7 +134,7 @@ public void onMatch(RelOptRuleCall call) // Add right-side input refs to the Project, so it matches the full original Correlate. final RexBuilder rexBuilder = correlate.getCluster().getRexBuilder(); final List pulledUpProjects = new ArrayList<>(leftProject.getProjects()); - for (int i = 0 ; i < right.getRowType().getFieldCount(); i++ ) { + for (int i = 0; i < right.getRowType().getFieldCount(); i++) { pulledUpProjects.add(rexBuilder.makeInputRef(druidCorrelateUnnest, i + leftScan.getRowType().getFieldCount())); } @@ -170,6 +163,7 @@ public void onMatch(RelOptRuleCall call) private static class PushCorrelatedFieldAccessPastProject extends RexShuttle { private final CorrelationId correlationId; + private final CorrelationId newCorrelationId; private final Project project; // "Sidecar" return value: computed along with the shuttling. @@ -177,10 +171,12 @@ private static class PushCorrelatedFieldAccessPastProject extends RexShuttle public PushCorrelatedFieldAccessPastProject( final CorrelationId correlationId, + final CorrelationId newCorrelationId, final Project project ) { this.correlationId = correlationId; + this.newCorrelationId = newCorrelationId; this.project = project; } @@ -199,7 +195,7 @@ public RexNode visitFieldAccess(final RexFieldAccess fieldAccess) // Rewrite RexInputRefs as correlation variable accesses. final RexBuilder rexBuilder = project.getCluster().getRexBuilder(); - final RexNode newCorrel = rexBuilder.makeCorrel(project.getInput().getRowType(), correlationId); + final RexNode newCorrel = rexBuilder.makeCorrel(project.getInput().getRowType(), newCorrelationId); return new RexShuttle() { @Override diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java index c482fcd9cbfe..aa3e78483061 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/DruidUnnestRule.java @@ -105,11 +105,12 @@ public void onMatch(final RelOptRuleCall call) final Uncollect uncollectRel = call.rel(0); final Project projectRel = call.rel(1); - if (RexUtil.isConstant(projectRel.getProjects().get(0))) { + final RexNode exprToUnnest = projectRel.getProjects().get(0); + if (RexUtil.isConstant(exprToUnnest)) { // Constant expression: transform to DruidQueryRel on an inline datasource. final InlineDataSource inlineDataSource = toInlineDataSource( uncollectRel, - projectRel.getProjects().get(0), + exprToUnnest, plannerContext ); @@ -126,7 +127,9 @@ public void onMatch(final RelOptRuleCall call) // Transform to DruidUnnestRel, a holder for an unnest of a correlated variable. call.transformTo( DruidUnnestRel.create( - (Uncollect) uncollectRel.copy(uncollectRel.getTraitSet(), projectRel), + uncollectRel.getCluster(), + uncollectRel.getTraitSet(), + exprToUnnest, plannerContext ) ); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 338d1f56c179..462bcfaf8b82 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -45,6 +45,7 @@ import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; +import org.apache.druid.query.filter.LikeDimFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; @@ -3213,7 +3214,7 @@ public void testUnnestWithFiltersOutside() + " UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)\n" + "WHERE t.dim2='a'" + "AND t.dim1 <> 'foo'\n" - + "AND unnested.d3 <> 'b'", + + "AND (unnested.d3 <> 'b' OR unnested.d3 IN ('a', 'c') OR unnested.d3 LIKE 'd%')", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3228,7 +3229,11 @@ public void testUnnestWithFiltersOutside() and( selector("dim2", "a", null), not(selector("dim1", "foo", null)), - not(selector("j0.unnest", "b", null)) + or( + not(selector("j0.unnest", "b", null)), + new LikeDimFilter("j0.unnest", "d%", null, null), + in("j0.unnest", ImmutableList.of("a", "c"), null) + ) ) ) .legacy(false) From cab501f2b2d55e54da247bfb1f7e3091b31a11dc Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 03:14:00 -0800 Subject: [PATCH 3/9] Fixes, simplification, additional filter pushdown. --- .../druid/segment/UnnestStorageAdapter.java | 111 +++++++++++++++--- .../druid/segment/filter/AndFilter.java | 23 ++++ .../apache/druid/segment/filter/OrFilter.java | 23 ++++ .../calcite/rel/DruidCorrelateUnnestRel.java | 15 +-- .../sql/calcite/CalciteArraysQueryTest.java | 63 ++++++++-- 5 files changed, 197 insertions(+), 38 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java index a8ee22d570de..939333c75402 100644 --- a/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/UnnestStorageAdapter.java @@ -19,19 +19,26 @@ package org.apache.druid.segment; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.apache.druid.java.util.common.Pair; import org.apache.druid.java.util.common.granularity.Granularity; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.query.QueryMetrics; +import org.apache.druid.query.filter.BooleanFilter; import org.apache.druid.query.filter.Filter; import org.apache.druid.query.filter.InDimFilter; import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.ListIndexed; import org.apache.druid.segment.filter.AndFilter; +import org.apache.druid.segment.filter.BoundFilter; import org.apache.druid.segment.filter.Filters; +import org.apache.druid.segment.filter.LikeFilter; +import org.apache.druid.segment.filter.NotFilter; +import org.apache.druid.segment.filter.SelectorFilter; import org.apache.druid.segment.join.PostJoinCursor; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.joda.time.DateTime; @@ -79,7 +86,15 @@ public Sequence makeCursors( @Nullable QueryMetrics queryMetrics ) { - final Pair filterPair = computeBaseAndPostJoinFilters(filter, virtualColumns); + final String inputColumn = getUnnestInputIfDirectAccess(); + final Pair filterPair = computeBaseAndPostCorrelateFilters( + filter, + virtualColumns, + inputColumn, + inputColumn == null || virtualColumns.exists(inputColumn) + ? null + : baseAdapter.getColumnCapabilities(inputColumn) + ); final Sequence baseCursorSequence = baseAdapter.makeCursors( filterPair.lhs, @@ -237,9 +252,21 @@ public VirtualColumn getUnnestColumn() return unnestColumn; } - private Pair computeBaseAndPostJoinFilters( + /** + * Split queryFilter into pre- and post-correlate filters. + * + * @param queryFilter query filter passed to makeCursors + * @param queryVirtualColumns query virtual columns passed to makeCursors + * @param inputColumn input column to unnest if it's a direct access; otherwise null + * @param inputColumnCapabilites input column capabilities if known; otherwise null + * + * @return pair of pre- and post-correlate filters + */ + private Pair computeBaseAndPostCorrelateFilters( @Nullable final Filter queryFilter, - final VirtualColumns queryVirtualColumns + final VirtualColumns queryVirtualColumns, + @Nullable final String inputColumn, + @Nullable final ColumnCapabilities inputColumnCapabilites ) { class FilterSplitter @@ -255,18 +282,25 @@ void add(@Nullable final Filter filter) final Set requiredColumns = filter.getRequiredColumns(); - if (requiredColumns.contains(outputColumnName)) { - postFilters.add(filter); - } else { - if (queryVirtualColumns.getVirtualColumns().length > 0) { - for (String column : requiredColumns) { - if (queryVirtualColumns.exists(column)) { - postFilters.add(filter); - return; - } + // Run filter post-correlate if it refers to any virtual columns. + if (queryVirtualColumns.getVirtualColumns().length > 0) { + for (String column : requiredColumns) { + if (queryVirtualColumns.exists(column)) { + postFilters.add(filter); + return; } } + } + if (requiredColumns.contains(outputColumnName)) { + // Try to move filter pre-correlate if possible. + final Filter newFilter = rewriteFilterOnUnnestColumnIfPossible(filter, inputColumn, inputColumnCapabilites); + if (newFilter != null) { + preFilters.add(newFilter); + } else { + postFilters.add(filter); + } + } else { preFilters.add(filter); } } @@ -275,8 +309,6 @@ void add(@Nullable final Filter filter) final FilterSplitter filterSplitter = new FilterSplitter(); if (allowSet != null && !allowSet.isEmpty()) { - final String inputColumn = getUnnestInputIfDirectAccess(); - // Filter on input column if possible (it may be faster); otherwise use output column. filterSplitter.add(new InDimFilter(inputColumn != null ? inputColumn : outputColumnName, allowSet)); } @@ -307,5 +339,54 @@ private String getUnnestInputIfDirectAccess() return null; } } -} + /** + * Rewrites a filter on {@link #outputColumnName} to operate on the input column from + * {@link #getUnnestInputIfDirectAccess()}, if possible. + */ + @Nullable + private Filter rewriteFilterOnUnnestColumnIfPossible( + final Filter filter, + @Nullable final String inputColumn, + @Nullable final ColumnCapabilities inputColumnCapabilities + ) + { + // Only doing this for multi-value strings (not array types) at the moment. + if (inputColumn == null + || inputColumnCapabilities == null + || inputColumnCapabilities.getType() != ValueType.STRING) { + return null; + } + + if (filterMapsOverMultiValueStrings(filter)) { + return filter.rewriteRequiredColumns(ImmutableMap.of(outputColumnName, inputColumn)); + } else { + return null; + } + } + + /** + * Requirement for {@link #rewriteFilterOnUnnestColumnIfPossible}: filter must support rewrites and also must map + * over multi-value strings. (Rather than treat them as arrays.) There isn't a method on the Filter interface that + * tells us this, so resort to instanceof. + */ + private static boolean filterMapsOverMultiValueStrings(final Filter filter) + { + if (filter instanceof BooleanFilter) { + for (Filter child : ((BooleanFilter) filter).getFilters()) { + if (!filterMapsOverMultiValueStrings(child)) { + return false; + } + } + + return true; + } else if (filter instanceof NotFilter) { + return filterMapsOverMultiValueStrings(((NotFilter) filter).getBaseFilter()); + } else { + return filter instanceof SelectorFilter + || filter instanceof InDimFilter + || filter instanceof LikeFilter + || filter instanceof BoundFilter; + } + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java index 56488c67d578..270e3b0ff677 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/AndFilter.java @@ -49,6 +49,7 @@ import java.util.Collection; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Objects; /** @@ -217,6 +218,28 @@ public LinkedHashSet getFilters() return filters; } + @Override + public boolean supportsRequiredColumnRewrite() + { + for (Filter filter : filters) { + if (!filter.supportsRequiredColumnRewrite()) { + return false; + } + } + + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + final List newFilters = new ArrayList<>(filters.size()); + for (Filter filter : filters) { + newFilters.add(filter.rewriteRequiredColumns(columnRewrites)); + } + return new AndFilter(newFilters); + } + @Override public String toString() { diff --git a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java index 0fa14dd4248c..77110f741a6e 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/OrFilter.java @@ -48,6 +48,7 @@ import java.util.ArrayList; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Objects; /** @@ -187,6 +188,28 @@ public LinkedHashSet getFilters() return filters; } + @Override + public boolean supportsRequiredColumnRewrite() + { + for (Filter filter : filters) { + if (!filter.supportsRequiredColumnRewrite()) { + return false; + } + } + + return true; + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + final List newFilters = new ArrayList<>(filters.size()); + for (Filter filter : filters) { + newFilters.add(filter.rewriteRequiredColumns(columnRewrites)); + } + return new OrFilter(newFilters); + } + @Override public String toString() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index b25a4bc964d3..63d5869d9be7 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -31,7 +31,6 @@ import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Correlate; import org.apache.calcite.rel.core.CorrelationId; -import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; @@ -50,7 +49,6 @@ import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.expression.builtin.MultiValueStringToArrayOperatorConversion; import org.apache.druid.sql.calcite.planner.Calcites; -import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignatures; @@ -74,11 +72,10 @@ public class DruidCorrelateUnnestRel extends DruidRel private static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__correlate_unnest__"); private static final String BASE_UNNEST_OUTPUT_COLUMN = "unnest"; - private final PartialDruidQuery partialQuery; - private final PlannerConfig plannerConfig; private final Correlate correlateRel; private final RelNode left; private final RelNode right; + private final PartialDruidQuery partialQuery; private DruidCorrelateUnnestRel( RelOptCluster cluster, @@ -93,7 +90,6 @@ private DruidCorrelateUnnestRel( this.partialQuery = partialQuery; this.left = correlateRel.getLeft(); this.right = correlateRel.getRight(); - this.plannerConfig = plannerContext.getPlannerConfig(); } /** @@ -272,15 +268,10 @@ public RelNode copy(final RelTraitSet traitSet, final List inputs) @Override public RelOptCost computeSelfCost(final RelOptPlanner planner, final RelMetadataQuery mq) { - double cost; + double cost = partialQuery.estimateCost(); if (computeLeftRequiresSubquery(DruidJoinQueryRel.getSomeDruidChild(left))) { - cost = CostEstimates.COST_SUBQUERY; - } else { - cost = partialQuery.estimateCost(); - if (correlateRel.getJoinType() == JoinRelType.INNER && plannerConfig.isComputeInnerJoinCostAsFilter()) { - cost *= CostEstimates.MULTIPLIER_FILTER; - } + cost += CostEstimates.COST_SUBQUERY; } return planner.getCostFactory().makeCost(cost, 0, 0); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 462bcfaf8b82..386cb1dbe629 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -49,6 +49,7 @@ import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.query.groupby.GroupByQuery; +import org.apache.druid.query.groupby.having.DimFilterHavingSpec; import org.apache.druid.query.groupby.orderby.DefaultLimitSpec; import org.apache.druid.query.groupby.orderby.NoopLimitSpec; import org.apache.druid.query.groupby.orderby.OrderByColumnSpec; @@ -1901,7 +1902,8 @@ public void testArrayAggArraysNoNest() ), expected -> { expected.expect(IAE.class); - expected.expectMessage("Cannot create a nested array type [ARRAY>], 'druid.expressions.allowNestedArrays' must be set to true"); + expected.expectMessage( + "Cannot create a nested array type [ARRAY>], 'druid.expressions.allowNestedArrays' must be set to true"); } ); } @@ -2816,12 +2818,12 @@ public void testUnnestTwiceWithFiltersAndExpressions() { cannotVectorize(); testQuery( - "SELECT dim1, MV_TO_ARRAY(dim3), STRING_TO_ARRAY(dim1, U&'\\005C.') AS dim1_split, dim1_split_unnest, dim3_unnest || 'xx' AS dim3_unnest\n" + "SELECT dim1, MV_TO_ARRAY(dim3), STRING_TO_ARRAY(dim1, U&'\\005C.') AS dim1_split, dim1_split_unnest, dim3_unnest || 'xx'\n" + "FROM\n" + " druid.numfoo,\n" + " UNNEST(STRING_TO_ARRAY(dim1, U&'\\005C.')) as t2 (dim1_split_unnest),\n" + " UNNEST(MV_TO_ARRAY(dim3)) as t3 (dim3_unnest)" - + "WHERE t2.dim1_split_unnest IN ('1', '2')", + + "WHERE dim1_split_unnest IN ('1', '2') AND dim3_unnest LIKE '_'", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -2862,7 +2864,10 @@ public void testUnnestTwiceWithFiltersAndExpressions() ColumnType.STRING ) ) - .filters(in("j0.unnest", ImmutableList.of("1", "2"), null)) + .filters(and( + in("j0.unnest", ImmutableList.of("1", "2"), null), + new LikeDimFilter("_j0.unnest", "_", null, null) + )) .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .legacy(false) .context(QUERY_CONTEXT_UNNEST) @@ -2872,8 +2877,7 @@ public void testUnnestTwiceWithFiltersAndExpressions() ImmutableList.of( new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "1", "bxx"}, new Object[]{"10.1", ImmutableList.of("b", "c"), ImmutableList.of("10", "1"), "1", "cxx"}, - new Object[]{"2", ImmutableList.of("d"), ImmutableList.of("2"), "2", "dxx"}, - new Object[]{"1", null, ImmutableList.of("1"), "1", "xx"} + new Object[]{"2", ImmutableList.of("d"), ImmutableList.of("2"), "2", "dxx"} ) ); } @@ -3022,6 +3026,44 @@ public void testUnnestWithGroupByOrderByWithLimit() ); } + @Test + public void testUnnestWithGroupByHaving() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "SELECT d3, COUNT(*) FROM druid.numfoo, UNNEST(MV_TO_ARRAY(dim3)) AS unnested(d3) GROUP BY d3 HAVING COUNT(*) = 1", + QUERY_CONTEXT_UNNEST, + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(UnnestDataSource.create( + new TableDataSource(CalciteTests.DATASOURCE3), + expressionVirtualColumn("j0.unnest", "\"dim3\"", ColumnType.STRING), + null + )) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setContext(QUERY_CONTEXT_UNNEST) + .setDimensions(new DefaultDimensionSpec("j0.unnest", "_d0", ColumnType.STRING)) + .setGranularity(Granularities.ALL) + .setAggregatorSpecs(new CountAggregatorFactory("a0")) + .setHavingSpec(new DimFilterHavingSpec(selector("a0", "1", null), true)) + .setContext(QUERY_CONTEXT_UNNEST) + .build() + ), + useDefault ? + ImmutableList.of( + new Object[]{"a", 1L}, + new Object[]{"c", 1L}, + new Object[]{"d", 1L} + ) : + ImmutableList.of( + new Object[]{"", 1L}, + new Object[]{"a", 1L}, + new Object[]{"c", 1L}, + new Object[]{"d", 1L} + ) + ); + } @Test public void testUnnestWithLimit() @@ -3212,9 +3254,9 @@ public void testUnnestWithFiltersOutside() "SELECT d3 FROM\n" + " druid.numfoo t,\n" + " UNNEST(MV_TO_ARRAY(dim3)) as unnested (d3)\n" - + "WHERE t.dim2='a'" + + "WHERE t.dim2='a'\n" + "AND t.dim1 <> 'foo'\n" - + "AND (unnested.d3 <> 'b' OR unnested.d3 IN ('a', 'c') OR unnested.d3 LIKE 'd%')", + + "AND (unnested.d3 IN ('a', 'c') OR unnested.d3 LIKE '_')", QUERY_CONTEXT_UNNEST, ImmutableList.of( Druids.newScanQueryBuilder() @@ -3230,8 +3272,7 @@ public void testUnnestWithFiltersOutside() selector("dim2", "a", null), not(selector("dim1", "foo", null)), or( - not(selector("j0.unnest", "b", null)), - new LikeDimFilter("j0.unnest", "d%", null, null), + new LikeDimFilter("j0.unnest", "_", null, null), in("j0.unnest", ImmutableList.of("a", "c"), null) ) ) @@ -3243,7 +3284,7 @@ public void testUnnestWithFiltersOutside() ), ImmutableList.of( new Object[]{"a"}, - new Object[]{""} + new Object[]{"b"} ) ); } From 3f1b226ce173160d7266dc16e918cbf7127b7198 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 09:31:58 -0800 Subject: [PATCH 4/9] Updates from review. --- sql/pom.xml | 5 +++++ .../druid/sql/calcite/rel/DruidCorrelateUnnestRel.java | 6 ------ .../java/org/apache/druid/sql/calcite/rel/DruidRel.java | 7 +++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sql/pom.xml b/sql/pom.xml index ffc44a3df76b..850286a43d82 100644 --- a/sql/pom.xml +++ b/sql/pom.xml @@ -189,6 +189,11 @@ JUnitParams test + + org.apache.commons + commons-text + test + org.apache.calcite calcite-core diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java index 63d5869d9be7..1ee31029e4f9 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidCorrelateUnnestRel.java @@ -183,12 +183,6 @@ public DruidQuery toDruidQuery(boolean finalizeAggregations) ); } - @Override - protected DruidCorrelateUnnestRel clone() - { - return DruidCorrelateUnnestRel.create(correlateRel, getPlannerContext()); - } - @Override protected RelDataType deriveRowType() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java index b264efe51b57..0371012cfb8d 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java @@ -106,6 +106,13 @@ public RelWriter explainTerms(RelWriter pw) throw new UnsupportedOperationException(); } + @Override + protected Object clone() throws CloneNotSupportedException + { + // RelNode implements Cloneable, but our class of rels is not cloned, so does not need to implement clone(). + throw new UnsupportedOperationException(); + } + /** * Returns a copy of this rel with the {@link DruidConvention} trait. */ From b32a20bdf38c2b162ccad0c467566b3eeb635e18 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 11:06:15 -0800 Subject: [PATCH 5/9] Style adjustment. --- .../java/org/apache/druid/segment/UnnestStorageAdapterTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java index cac51de17f25..e064587a05ea 100644 --- a/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/UnnestStorageAdapterTest.java @@ -96,7 +96,7 @@ public static void setup() ); UNNEST_STORAGE_ADAPTER2 = new UnnestStorageAdapter( UNNEST_STORAGE_ADAPTER, - new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME +"\"", null, ExprMacroTable.nil()), + new ExpressionVirtualColumn(OUTPUT_COLUMN_NAME1, "\"" + COLUMNNAME + "\"", null, ExprMacroTable.nil()), null ); UNNEST_STORAGE_ADAPTER3 = new UnnestStorageAdapter( From d57697847929cc06957de0301aef27e96294ae4b Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 11:59:06 -0800 Subject: [PATCH 6/9] Fix unused import. --- .../java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java index dca85b127907..eccdc6cd6959 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQueryRel.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.base.Preconditions; import org.apache.calcite.plan.Convention; -import org.apache.calcite.plan.ConventionTraitDef; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptPlanner; From 4d5908ff647a56e76d9540d7ea91a8283890d8bb Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 17:55:22 -0800 Subject: [PATCH 7/9] Update test. --- .../org/apache/druid/query/groupby/GroupByQueryRunnerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 47be50997f23..6d898e725f9f 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -13398,7 +13398,7 @@ public void testGroupByFloatMinExpressionVsVirtualColumnWithExplicitStringVirtua "minExpression", NullHandling.replaceWithDefault() ? Float.POSITIVE_INFINITY : null, "minVc", - NullHandling.replaceWithDefault() ? Float.POSITIVE_INFINITY : null + Float.POSITIVE_INFINITY ) ); From 5fce9ccc2effea25ee5a0397395f74ad05b086a0 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 20:36:10 -0800 Subject: [PATCH 8/9] Fix test. --- .../apache/druid/sql/calcite/CalciteArraysQueryTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 386cb1dbe629..67d1971a16ef 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -3269,12 +3269,12 @@ public void testUnnestWithFiltersOutside() .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) .filters( and( - selector("dim2", "a", null), - not(selector("dim1", "foo", null)), or( new LikeDimFilter("j0.unnest", "_", null, null), in("j0.unnest", ImmutableList.of("a", "c"), null) - ) + ), + selector("dim2", "a", null), + not(selector("dim1", "foo", null)) ) ) .legacy(false) From 066e611f35734639094058a823435eba09882c78 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 9 Mar 2023 22:30:39 -0800 Subject: [PATCH 9/9] Fix exception. --- .../main/java/org/apache/druid/sql/calcite/rel/DruidRel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java index 0371012cfb8d..ac6c08547494 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidRel.java @@ -110,7 +110,7 @@ public RelWriter explainTerms(RelWriter pw) protected Object clone() throws CloneNotSupportedException { // RelNode implements Cloneable, but our class of rels is not cloned, so does not need to implement clone(). - throw new UnsupportedOperationException(); + throw new CloneNotSupportedException(); } /**