From 81ad3801c482d00260998cd7d5a92e406124809b Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 3 Jan 2020 11:01:13 -0800 Subject: [PATCH 1/5] Add SQL GROUPING SETS support. Built on top of the subtotalsSpec feature in the groupBy query. This also involves two changes to subtotalsSpec: - Alter behavior so limitSpec is applied after subtotalsSpec, rather than applied to each grouping set. This is more in line with SQL standard behavior. I think it is okay to make this change, since the old behavior was not documented, so users should hopefully not be depending on it. - Fix a bug where virtual columns were included in the subtotal queries, but they should not have been. Also fixes two bugs in query equality checking: - BaseQuery: Use getDuration() instead of "duration" in equals and hashCode, since the latter is lazily initialized and might be null in one query but not the other. - GroupByQuery: Include subtotalsSpec in equals and hashCode. --- docs/querying/sql.md | 18 +- .../org/apache/druid/query/BaseQuery.java | 4 +- .../druid/query/groupby/GroupByQuery.java | 12 +- .../groupby/strategy/GroupByStrategyV2.java | 61 +- .../query/groupby/GroupByQueryRunnerTest.java | 35 +- .../druid/sql/calcite/planner/Rules.java | 4 +- .../druid/sql/calcite/rel/DruidQuery.java | 84 ++- .../druid/sql/calcite/rel/Grouping.java | 117 +++- .../druid/sql/calcite/rel/Subtotals.java | 111 ++++ .../ProjectAggregatePruneUnusedCallRule.java | 13 +- .../druid/sql/calcite/CalciteQueryTest.java | 557 +++++++++++++++++- 11 files changed, 885 insertions(+), 131 deletions(-) create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/rel/Subtotals.java diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 8d6a04840407..5c2c858f3406 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -63,7 +63,7 @@ Druid SQL supports SELECT queries with the following structure: SELECT [ ALL | DISTINCT ] { * | exprs } FROM table [ WHERE expr ] -[ GROUP BY exprs ] +[ GROUP BY [ exprs | GROUPING SETS ( (exprs), ... ) | ROLLUP (exprs) | CUBE (exprs) ] ] [ HAVING expr ] [ ORDER BY expr [ ASC | DESC ], expr [ ASC | DESC ], ... ] [ LIMIT limit ] @@ -84,6 +84,22 @@ trigger an aggregation query using one of Druid's [three native aggregation quer can refer to an expression or a select clause ordinal position (like `GROUP BY 2` to group by the second selected column). +The GROUP BY clause can also refer to multiple grouping sets in three ways. The most flexible is GROUP BY GROUPING SETS, +for example `GROUP BY GROUPING SETS ( (country, city), () )`. This example is equivalent to a `GROUP BY country, city` +followed by `GROUP BY ()` (a grand total). With GROUPING SETS, the underlying data is only scanned one time, leading to +better efficiency. Second, GROUP BY ROLLUP computes a grouping set for each level of the grouping expressions. For +example `GROUP BY ROLLUP (country, city)` is equivalent to `GROUP BY GROUPING SETS ( (country, city), (country), () )` +and will produce grouped rows for each country / city pair, along with subtotals for each country, along with a grand +total. Finally, GROUP BY CUBE computes a grouping set for each combination of grouping expressions. For example, +`GROUP BY CUBE (country, city)` is equivalent to `GROUP BY GROUPING SETS ( (country, city), (country), (city), () )`. +Grouping columns that do not apply to a particular row will contain `NULL`. For example, when computing +`GROUP BY GROUPING SETS ( (country, city), () )`, the grand total row corresponding to `()` will have `NULL` for the +"country" and "city" columns. + +When using GROUP BY GROUPING SETS, GROUP BY ROLLUP, or GROUP BY CUBE, be aware that results may not be generated in the +order that you specify your grouping sets in the query. If you need results to be generated in a particular order, use +the ORDER BY clause. + The HAVING clause refers to columns that are present after execution of GROUP BY. It can be used to filter on either grouping expressions or aggregated values. It can only be used together with GROUP BY. diff --git a/processing/src/main/java/org/apache/druid/query/BaseQuery.java b/processing/src/main/java/org/apache/druid/query/BaseQuery.java index cee136275c38..edd8e1f0ebcf 100644 --- a/processing/src/main/java/org/apache/druid/query/BaseQuery.java +++ b/processing/src/main/java/org/apache/druid/query/BaseQuery.java @@ -269,7 +269,7 @@ public boolean equals(Object o) Objects.equals(dataSource, baseQuery.dataSource) && Objects.equals(context, baseQuery.context) && Objects.equals(querySegmentSpec, baseQuery.querySegmentSpec) && - Objects.equals(duration, baseQuery.duration) && + Objects.equals(getDuration(), baseQuery.getDuration()) && Objects.equals(granularity, baseQuery.granularity); } @@ -277,6 +277,6 @@ public boolean equals(Object o) public int hashCode() { - return Objects.hash(dataSource, descending, context, querySegmentSpec, duration, granularity); + return Objects.hash(dataSource, descending, context, querySegmentSpec, getDuration(), granularity); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java index 69ab18540c5a..ed3ce41048dc 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java @@ -794,6 +794,11 @@ public GroupByQuery withQuerySegmentSpec(QuerySegmentSpec spec) return new Builder(this).setQuerySegmentSpec(spec).build(); } + public GroupByQuery withVirtualColumns(final VirtualColumns virtualColumns) + { + return new Builder(this).setVirtualColumns(virtualColumns).build(); + } + public GroupByQuery withDimFilter(@Nullable final DimFilter dimFilter) { return new Builder(this).setDimFilter(dimFilter).build(); @@ -1198,6 +1203,7 @@ public String toString() ", dimensions=" + dimensions + ", aggregatorSpecs=" + aggregatorSpecs + ", postAggregatorSpecs=" + postAggregatorSpecs + + (subtotalsSpec != null ? (", subtotalsSpec=" + subtotalsSpec) : null) + ", havingSpec=" + havingSpec + ", context=" + getContext() + '}'; @@ -1222,7 +1228,8 @@ public boolean equals(final Object o) Objects.equals(dimFilter, that.dimFilter) && Objects.equals(dimensions, that.dimensions) && Objects.equals(aggregatorSpecs, that.aggregatorSpecs) && - Objects.equals(postAggregatorSpecs, that.postAggregatorSpecs); + Objects.equals(postAggregatorSpecs, that.postAggregatorSpecs) && + Objects.equals(subtotalsSpec, that.subtotalsSpec); } @Override @@ -1236,7 +1243,8 @@ public int hashCode() dimFilter, dimensions, aggregatorSpecs, - postAggregatorSpecs + postAggregatorSpecs, + subtotalsSpec ); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java b/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java index 8f57bded5c9f..a5874ed75b43 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java @@ -69,6 +69,7 @@ import org.apache.druid.query.groupby.resource.GroupByQueryResource; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.VirtualColumns; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -372,7 +373,11 @@ public Sequence processSubtotalsSpec( GroupByRowProcessor.ResultSupplier resultSupplierOne = null; try { - GroupByQuery queryWithoutSubtotalsSpec = query + // baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results* + // rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only + // make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them + // one-by-one soon enough. + GroupByQuery baseSubtotalQuery = query .withDimensionSpecs(query.getDimensions().stream().map( dimSpec -> new DefaultDimensionSpec( dimSpec.getOutputName(), @@ -386,13 +391,13 @@ public Sequence processSubtotalsSpec( .map(AggregatorFactory::getCombiningFactory) .collect(Collectors.toList()) ) - .withSubtotalsSpec(null) - .withDimFilter(null); - + .withVirtualColumns(VirtualColumns.EMPTY) + .withDimFilter(null) + .withSubtotalsSpec(null); resultSupplierOne = GroupByRowProcessor.process( - queryWithoutSubtotalsSpec, - queryWithoutSubtotalsSpec, + baseSubtotalQuery, + baseSubtotalQuery, queryResult, configSupplier.get(), resource, @@ -401,13 +406,13 @@ public Sequence processSubtotalsSpec( processingConfig.intermediateComputeSizeBytes() ); - List queryDimNames = queryWithoutSubtotalsSpec.getDimensions().stream().map(DimensionSpec::getOutputName) - .collect(Collectors.toList()); + List queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName) + .collect(Collectors.toList()); // Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec. Set aggsAndPostAggs = null; - if (queryWithoutSubtotalsSpec.getLimitSpec() != null && !(queryWithoutSubtotalsSpec.getLimitSpec() instanceof NoopLimitSpec)) { - aggsAndPostAggs = getAggregatorAndPostAggregatorNames(queryWithoutSubtotalsSpec); + if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) { + aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery); } List> subtotals = query.getSubtotalsSpec(); @@ -442,14 +447,14 @@ public Sequence processSubtotalsSpec( // Create appropriate LimitSpec for subtotal query LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance(); - if (queryWithoutSubtotalsSpec.getLimitSpec() != null && !(queryWithoutSubtotalsSpec.getLimitSpec() instanceof NoopLimitSpec)) { - Set columns = new HashSet(aggsAndPostAggs); + if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) { + Set columns = new HashSet<>(aggsAndPostAggs); columns.addAll(subtotalSpec); - subtotalQueryLimitSpec = queryWithoutSubtotalsSpec.getLimitSpec().filterColumns(columns); + subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns); } - GroupByQuery subtotalQuery = queryWithoutSubtotalsSpec + GroupByQuery subtotalQuery = baseSubtotalQuery .withLimitSpec(subtotalQueryLimitSpec) .withDimensionSpecs(newDimensions); @@ -468,7 +473,7 @@ public Sequence processSubtotalsSpec( // Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate // merge buffers for processing subtotal. Supplier resultSupplierTwo = () -> GroupByRowProcessor.process( - queryWithoutSubtotalsSpec, + baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subtotalSpec), configSupplier.get(), @@ -485,7 +490,7 @@ public Sequence processSubtotalsSpec( } return Sequences.withBaggage( - Sequences.concat(subtotalsResults), + query.postProcess(Sequences.concat(subtotalsResults)), resultSupplierOne //this will close resources allocated by resultSupplierOne after sequence read ); } @@ -506,21 +511,17 @@ private Sequence processSubtotalsResultAndOptionallyClose( // on sequence read if closeOnSequenceRead is true. try { Supplier memoizedSupplier = Suppliers.memoize(baseResultsSupplier); - return applyPostProcessing( - mergeResults( - (queryPlus, responseContext) -> - new LazySequence<>( - () -> Sequences.withBaggage( - memoizedSupplier.get().results(dimsToInclude), - closeOnSequenceRead ? () -> CloseQuietly.close(memoizedSupplier.get()) : () -> {} - ) - ), - subtotalQuery, - null - ), - subtotalQuery + return mergeResults( + (queryPlus, responseContext) -> + new LazySequence<>( + () -> Sequences.withBaggage( + memoizedSupplier.get().results(dimsToInclude), + closeOnSequenceRead ? () -> CloseQuietly.close(memoizedSupplier.get()) : () -> {} + ) + ), + subtotalQuery, + null ); - } catch (Exception ex) { CloseQuietly.close(baseResultsSupplier.get()); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 87f0de0b15ba..c8b7487d7a0d 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -414,6 +414,8 @@ public ByteBuffer get() @Parameterized.Parameters(name = "{0}") public static Collection constructorFeeder() { + NullHandling.initializeForTests(); + final List constructors = new ArrayList<>(); for (GroupByQueryConfig config : testConfigs()) { final Pair factoryAndCloser = makeQueryRunnerFactory(config); @@ -7270,38 +7272,13 @@ public void testGroupByWithSubtotalsSpecWithOrderLimit() .addOrderByColumn("idx") .addOrderByColumn("alias") .addOrderByColumn("market") - .setLimit(1) + .setLimit(3) .build(); List expectedResults = Arrays.asList( - makeRow( - query, - "2011-04-01", - "alias", - "technology", - "rows", - 1L, - "idx", - 78L - ), - makeRow( - query, - "2011-04-01T00:00:00.000Z", - "market", - "spot", - "rows", - 9L, - "idx", - 1102L - ), - makeRow( - query, - "2011-04-01T00:00:00.000Z", - "rows", - 13L, - "idx", - 6619L - ) + makeRow(query, "2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), + makeRow(query, "2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), + makeRow(query, "2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L) ); Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java index f9af03cfeb13..ee18e2f7c309 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java @@ -231,8 +231,8 @@ private static List baseRuleSet(final PlannerContext plannerContext) rules.addAll(SUB_QUERY_REMOVE_RULES); if (!plannerConfig.isUseApproximateCountDistinct()) { - // We'll need this to expand COUNT DISTINCTs. - // Avoid AggregateExpandDistinctAggregatesRule.INSTANCE; it uses grouping sets and we don't support those. + // For some reason, even though we support grouping sets, using AggregateExpandDistinctAggregatesRule.INSTANCE + // here causes CalciteQueryTest#testExactCountDistinctWithGroupingAndOtherAggregators to fail. rules.add(AggregateExpandDistinctAggregatesRule.JOIN); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index b9769606dbb6..992fa651593e 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -26,7 +26,8 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; -import org.apache.calcite.plan.RelOptUtil; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; @@ -44,7 +45,6 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; -import org.apache.druid.math.expr.Parser; import org.apache.druid.query.DataSource; import org.apache.druid.query.Query; import org.apache.druid.query.QueryDataSource; @@ -281,6 +281,11 @@ private static Grouping computeGrouping( virtualColumnRegistry ); + final Subtotals subtotals = computeSubtotals( + partialQuery, + rowSignature + ); + final List aggregations = computeAggregations( partialQuery, plannerContext, @@ -306,33 +311,12 @@ private static Grouping computeGrouping( aggregateRowSignature ); + final Grouping grouping = Grouping.create(dimensions, subtotals, aggregations, havingFilter, aggregateRowSignature); + if (aggregateProject == null) { - return Grouping.create(dimensions, aggregations, havingFilter, aggregateRowSignature); + return grouping; } else { - final Projection postAggregationProjection = Projection.postAggregation( - aggregateProject, - plannerContext, - aggregateRowSignature, - "p" - ); - - postAggregationProjection.getPostAggregators().forEach( - postAggregator -> aggregations.add(Aggregation.create(postAggregator)) - ); - - // Remove literal dimensions that did not appear in the projection. This is useful for queries - // like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't - // actually want to include a dimension 'dummy'. - final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(aggregateProject.getChildExps(), null); - for (int i = dimensions.size() - 1; i >= 0; i--) { - final DimensionExpression dimension = dimensions.get(i); - if (Parser.parse(dimension.getDruidExpression().getExpression(), plannerContext.getExprMacroTable()) - .isLiteral() && !aggregateProjectBits.get(i)) { - dimensions.remove(i); - } - } - - return Grouping.create(dimensions, aggregations, havingFilter, postAggregationProjection.getOutputRowSignature()); + return grouping.applyProject(plannerContext, aggregateProject); } } @@ -399,6 +383,38 @@ private static List computeDimensions( return dimensions; } + /** + * Builds a {@link Subtotals} object based on {@link Aggregate#getGroupSets()}. + */ + private static Subtotals computeSubtotals( + final PartialDruidQuery partialQuery, + final RowSignature rowSignature + ) + { + final Aggregate aggregate = partialQuery.getAggregate(); + + // dimBitMapping maps from input field position to group set position (dimension number). + final int[] dimBitMapping = new int[rowSignature.getRowOrder().size()]; + int i = 0; + for (int dimBit : aggregate.getGroupSet()) { + dimBitMapping[dimBit] = i++; + } + + // Use dimBitMapping to remap groupSets (which is input-field-position based) into subtotals (which is + // dimension-list-position based). + final List subtotals = new ArrayList<>(); + for (ImmutableBitSet groupSet : aggregate.getGroupSets()) { + final IntList subtotal = new IntArrayList(); + for (int dimBit : groupSet) { + subtotal.add(dimBitMapping[dimBit]); + } + + subtotals.add(subtotal); + } + + return new Subtotals(subtotals); + } + /** * Returns aggregations corresponding to {@code aggregate.getAggCallList()}, in the same order. * @@ -662,7 +678,9 @@ private Query computeQuery() @Nullable public TimeseriesQuery toTimeseriesQuery() { - if (grouping == null || grouping.getHavingFilter() != null) { + if (grouping == null + || grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) + || grouping.getHavingFilter() != null) { return null; } @@ -743,9 +761,10 @@ public TimeseriesQuery toTimeseriesQuery() @Nullable public TopNQuery toTopNQuery() { - // Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING. + // Must have GROUP BY one column, no GROUPING SETS, ORDER BY ≤ 1 column, limit less than maxTopNLimit, no HAVING. final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 + && !grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) && sorting != null && (sorting.getOrderBys().size() <= 1 && sorting.isLimited() && sorting.getLimit() <= plannerContext.getPlannerConfig() @@ -851,9 +870,12 @@ public GroupByQuery toGroupByQuery() postAggregators, havingSpec, sorting != null - ? new DefaultLimitSpec(sorting.getOrderBys(), sorting.isLimited() ? Ints.checkedCast(sorting.getLimit()) : null) + ? new DefaultLimitSpec( + sorting.getOrderBys(), + sorting.isLimited() ? Ints.checkedCast(sorting.getLimit()) : null + ) : NoopLimitSpec.instance(), - null, + grouping.getSubtotals().toSubtotalsSpec(grouping.getDimensionSpecs()), ImmutableSortedMap.copyOf(plannerContext.getQueryContext()) ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java index 8ce4cd8e443a..fc450725a9ef 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java @@ -20,16 +20,24 @@ package org.apache.druid.sql.calcite.rel; import com.google.common.collect.ImmutableList; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.math.expr.Parser; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.DimensionExpression; +import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignature; import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -48,23 +56,27 @@ public class Grouping { private final List dimensions; + private final Subtotals subtotals; private final List aggregations; + @Nullable private final DimFilter havingFilter; private final RowSignature outputRowSignature; private Grouping( final List dimensions, + final Subtotals subtotals, final List aggregations, - final DimFilter havingFilter, + @Nullable final DimFilter havingFilter, final RowSignature outputRowSignature ) { this.dimensions = ImmutableList.copyOf(dimensions); + this.subtotals = subtotals; this.aggregations = ImmutableList.copyOf(aggregations); this.havingFilter = havingFilter; this.outputRowSignature = outputRowSignature; - // Verify no collisions. + // Verify no collisions between dimensions, aggregations, post-aggregations. final Set seen = new HashSet<>(); for (DimensionExpression dimensionExpression : dimensions) { if (!seen.add(dimensionExpression.getOutputName())) { @@ -92,12 +104,13 @@ private Grouping( public static Grouping create( final List dimensions, + final Subtotals subtotals, final List aggregations, final DimFilter havingFilter, final RowSignature outputRowSignature ) { - return new Grouping(dimensions, aggregations, havingFilter, outputRowSignature); + return new Grouping(dimensions, subtotals, aggregations, havingFilter, outputRowSignature); } public List getDimensions() @@ -105,6 +118,11 @@ public List getDimensions() return dimensions; } + public Subtotals getSubtotals() + { + return subtotals; + } + public List getAggregations() { return aggregations; @@ -141,8 +159,77 @@ public RowSignature getOutputRowSignature() return outputRowSignature; } + /** + * Applies a post-grouping projection. + * + * @see DruidQuery#computeGrouping which uses this + */ + public Grouping applyProject(final PlannerContext plannerContext, final Project project) + { + final List newDimensions = new ArrayList<>(); + final List newAggregations = new ArrayList<>(aggregations); + final Subtotals newSubtotals; + + final Projection postAggregationProjection = Projection.postAggregation( + project, + plannerContext, + outputRowSignature, + "p" + ); + + postAggregationProjection.getPostAggregators().forEach( + postAggregator -> newAggregations.add(Aggregation.create(postAggregator)) + ); + + // Remove literal dimensions that did not appear in the projection. This is useful for queries + // like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't + // actually want to include a dimension 'dummy'. + final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(project.getChildExps(), null); + final int[] newDimIndexes = new int[dimensions.size()]; + + for (int i = 0; i < dimensions.size(); i++) { + final DimensionExpression dimension = dimensions.get(i); + if (Parser.parse(dimension.getDruidExpression().getExpression(), plannerContext.getExprMacroTable()) + .isLiteral() && !aggregateProjectBits.get(i)) { + newDimIndexes[i] = -1; + } else { + newDimIndexes[i] = newDimensions.size(); + newDimensions.add(dimension); + } + } + + // Renumber subtotals, if needed, to account for removed dummy dimensions. + if (newDimensions.size() != dimensions.size()) { + final List newSubtotalsList = new ArrayList<>(); + + for (IntList subtotal : subtotals.getSubtotals()) { + final IntList newSubtotal = new IntArrayList(); + for (int dimIndex : subtotal) { + final int newDimIndex = newDimIndexes[dimIndex]; + if (newDimIndex >= 0) { + newSubtotal.add(newDimIndex); + } + } + + newSubtotalsList.add(newSubtotal); + } + + newSubtotals = new Subtotals(newSubtotalsList); + } else { + newSubtotals = subtotals; + } + + return Grouping.create( + newDimensions, + newSubtotals, + newAggregations, + havingFilter, + postAggregationProjection.getOutputRowSignature() + ); + } + @Override - public boolean equals(final Object o) + public boolean equals(Object o) { if (this == o) { return true; @@ -150,27 +237,17 @@ public boolean equals(final Object o) if (o == null || getClass() != o.getClass()) { return false; } - final Grouping grouping = (Grouping) o; - return Objects.equals(dimensions, grouping.dimensions) && - Objects.equals(aggregations, grouping.aggregations) && + Grouping grouping = (Grouping) o; + return dimensions.equals(grouping.dimensions) && + subtotals.equals(grouping.subtotals) && + aggregations.equals(grouping.aggregations) && Objects.equals(havingFilter, grouping.havingFilter) && - Objects.equals(outputRowSignature, grouping.outputRowSignature); + outputRowSignature.equals(grouping.outputRowSignature); } @Override public int hashCode() { - return Objects.hash(dimensions, aggregations, havingFilter, outputRowSignature); - } - - @Override - public String toString() - { - return "Grouping{" + - "dimensions=" + dimensions + - ", aggregations=" + aggregations + - ", havingFilter=" + havingFilter + - ", outputRowSignature=" + outputRowSignature + - '}'; + return Objects.hash(dimensions, subtotals, aggregations, havingFilter, outputRowSignature); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Subtotals.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Subtotals.java new file mode 100644 index 000000000000..64f4fb9e9c43 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Subtotals.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rel; + + +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.druid.query.dimension.DimensionSpec; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * Represents the Druid groupBy query concept of subtotals, which is similar to GROUPING SETS. + */ +public class Subtotals +{ + /** + * List of subtotals: each one is a list of dimension indexes. (i.e. [0, 1] means use the first and second + * dimensions). + */ + private final List subtotals; + + Subtotals(List subtotals) + { + this.subtotals = subtotals; + } + + public List getSubtotals() + { + return subtotals; + } + + @Nullable + public List> toSubtotalsSpec(final List dimensions) + { + if (hasEffect(dimensions)) { + return subtotals.stream() + .map( + subtotalInts -> { + final List subtotalDimensionNames = new ArrayList<>(); + for (int dimIndex : subtotalInts) { + subtotalDimensionNames.add(dimensions.get(dimIndex).getOutputName()); + } + return subtotalDimensionNames; + } + ) + .collect(Collectors.toList()); + } else { + return null; + } + } + + /** + * Returns whether this subtotals spec has an effect, and cannot be ignored. + */ + public boolean hasEffect(final List dimensionSpecs) + { + if (subtotals.isEmpty() || (subtotals.size() == 1 && subtotals.get(0).size() == dimensionSpecs.size())) { + return false; + } else { + return true; + } + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Subtotals subtotals1 = (Subtotals) o; + return subtotals.equals(subtotals1.subtotals); + } + + @Override + public int hashCode() + { + return Objects.hash(subtotals); + } + + @Override + public String toString() + { + return "Subtotals{" + + "subtotals=" + subtotals + + '}'; + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java index 8241592d3315..4c79e80b39bf 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java @@ -50,13 +50,6 @@ public static ProjectAggregatePruneUnusedCallRule instance() return INSTANCE; } - @Override - public boolean matches(final RelOptRuleCall call) - { - final Aggregate aggregate = call.rel(1); - return !aggregate.indicator && aggregate.getGroupSets().size() == 1; - } - @Override public void onMatch(final RelOptRuleCall call) { @@ -90,7 +83,6 @@ public void onMatch(final RelOptRuleCall call) final Aggregate newAggregate = aggregate.copy( aggregate.getTraitSet(), aggregate.getInput(), - aggregate.indicator, aggregate.getGroupSet(), aggregate.getGroupSets(), newAggregateCalls @@ -100,14 +92,13 @@ public void onMatch(final RelOptRuleCall call) final List fixUpProjects = new ArrayList<>(); final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); - // Project the group unchanged. + // Project the group set unchanged. for (int i = 0; i < aggregate.getGroupCount(); i++) { fixUpProjects.add(rexBuilder.makeInputRef(newAggregate, i)); } // Replace pruned-out aggregators with NULLs. - int j = aggregate.getGroupCount(); - for (int i = aggregate.getGroupCount(); i < fieldCount; i++) { + for (int i = aggregate.getGroupCount(), j = aggregate.getGroupCount(); i < fieldCount; i++) { if (callsToKeep.get(i)) { fixUpProjects.add(rexBuilder.makeInputRef(newAggregate, j++)); } else { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 4116d9519d9b..04370db62410 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -2612,6 +2612,27 @@ public void testGroupByWithFilterMatchingNothing() throws Exception ); } + @Test + public void testGroupByWithGroupByEmpty() throws Exception + { + testQuery( + "SELECT COUNT(*), SUM(cnt) FROM druid.foo GROUP BY ()", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .aggregators(aggregators( + new CountAggregatorFactory("a0"), + new LongSumAggregatorFactory("a1", "cnt") + )) + .context(TIMESERIES_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of(new Object[]{6L, 6L}) + ); + } + @Test public void testGroupByWithFilterMatchingNothingWithGroupByLiteral() throws Exception { @@ -4892,7 +4913,7 @@ public void testNestedGroupBy() throws Exception ? new CountAggregatorFactory("_a0") : new FilteredAggregatorFactory( new CountAggregatorFactory("_a0"), - not(selector("d1", null, null)) + not(selector("d0", null, null)) ) ) ) @@ -7550,6 +7571,538 @@ public void testGroupByTimeAndOtherDimension() throws Exception ); } + @Test + public void testGroupingSets() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (dim2, gran), (dim2), (gran), () )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_VALUE, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithNumericDimension() throws Exception + { + testQuery( + "SELECT cnt, COUNT(*)\n" + + "FROM foo\n" + + "GROUP BY GROUPING SETS ( (cnt), () )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("cnt", "d0", ValueType.LONG))) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("d0"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 6L}, + new Object[]{null, 6L} + ) + ); + } + + @Test + public void testGroupByRollup() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY ROLLUP (dim2, gran)", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_VALUE, null, 6L} + ) + ); + } + + @Test + public void testGroupByCube() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY CUBE (dim2, gran)", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_VALUE, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithDummyDimension() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (dim2, 'dummy', gran), (dim2), (gran), ('dummy') )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v2", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v2", "v2", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v2"), + ImmutableList.of("v0"), + ImmutableList.of(), + ImmutableList.of("v2") + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_VALUE, null, 6L}, + new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L} + ) + ); + } + + @Test + public void testGroupingSetsNoSuperset() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + // Note: the grouping sets are reordered in the output of this query, but this is allowed. + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_VALUE, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithOrderByDimension() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )\n" + + "ORDER BY gran, dim2 DESC", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "v1", + Direction.ASCENDING, + StringComparators.NUMERIC + ), + new OrderByColumnSpec( + "v0", + Direction.DESCENDING, + StringComparators.LEXICOGRAPHIC + ) + ), + Integer.MAX_VALUE + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"abc", null, 1L}, + new Object[]{"a", null, 2L}, + new Object[]{"", null, 3L}, + new Object[]{NULL_VALUE, null, 6L}, + new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L} + ) + ); + } + + @Test + public void testGroupingSetsWithOrderByAggregator() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )\n" + + "ORDER BY SUM(cnt)\n", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "a0", + Direction.ASCENDING, + StringComparators.NUMERIC + ) + ), + Integer.MAX_VALUE + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"abc", null, 1L}, + new Object[]{"a", null, 2L}, + new Object[]{"", null, 3L}, + new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_VALUE, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithOrderByAggregatorWithLimit() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )\n" + + "ORDER BY SUM(cnt)\n" + + "LIMIT 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "a0", + Direction.ASCENDING, + StringComparators.NUMERIC + ) + ), + 1 + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"abc", null, 1L} + ) + ); + } + @Test public void testUsingSubqueryAsPartOfAndFilter() throws Exception { @@ -9731,8 +10284,6 @@ public void testLeftRightStringOperators() throws Exception ) ); } - - @Test public void testQueryContextOuterLimit() throws Exception From 3c7cef8e5e54520fa6532f8ba2772e78da07618c Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 3 Jan 2020 12:17:54 -0800 Subject: [PATCH 2/5] Fix bugs. --- .../main/java/org/apache/druid/query/groupby/GroupByQuery.java | 2 +- .../java/org/apache/druid/sql/calcite/CalciteQueryTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java index ed3ce41048dc..459e8576c9e0 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java @@ -1203,7 +1203,7 @@ public String toString() ", dimensions=" + dimensions + ", aggregatorSpecs=" + aggregatorSpecs + ", postAggregatorSpecs=" + postAggregatorSpecs + - (subtotalsSpec != null ? (", subtotalsSpec=" + subtotalsSpec) : null) + + (subtotalsSpec != null ? (", subtotalsSpec=" + subtotalsSpec) : "") + ", havingSpec=" + havingSpec + ", context=" + getContext() + '}'; diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 04370db62410..b22dfb81bac8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -4913,7 +4913,7 @@ public void testNestedGroupBy() throws Exception ? new CountAggregatorFactory("_a0") : new FilteredAggregatorFactory( new CountAggregatorFactory("_a0"), - not(selector("d0", null, null)) + not(selector("d1", null, null)) ) ) ) From a9ede1a829a486872daa7744949e822d28e288d7 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 24 Feb 2020 18:17:35 -0800 Subject: [PATCH 3/5] Fix tests. --- .../druid/sql/calcite/CalciteQueryTest.java | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 69beafdc26c9..c49306f2862a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -9654,9 +9654,9 @@ public void testGroupingSets() throws Exception new Object[]{"", null, 3L}, new Object[]{"a", null, 2L}, new Object[]{"abc", null, 1L}, - new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, - new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, - new Object[]{NULL_VALUE, null, 6L} + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} ) ); } @@ -9744,7 +9744,7 @@ public void testGroupByRollup() throws Exception new Object[]{"", null, 3L}, new Object[]{"a", null, 2L}, new Object[]{"abc", null, 1L}, - new Object[]{NULL_VALUE, null, 6L} + new Object[]{NULL_STRING, null, 6L} ) ); } @@ -9803,9 +9803,9 @@ public void testGroupByCube() throws Exception new Object[]{"", null, 3L}, new Object[]{"a", null, 2L}, new Object[]{"abc", null, 1L}, - new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, - new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, - new Object[]{NULL_VALUE, null, 6L} + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} ) ); } @@ -9864,9 +9864,9 @@ public void testGroupingSetsWithDummyDimension() throws Exception new Object[]{"", null, 3L}, new Object[]{"a", null, 2L}, new Object[]{"abc", null, 1L}, - new Object[]{NULL_VALUE, null, 6L}, - new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, - new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L} + new Object[]{NULL_STRING, null, 6L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L} ) ); } @@ -9920,9 +9920,9 @@ public void testGroupingSetsNoSuperset() throws Exception new Object[]{"", null, 3L}, new Object[]{"a", null, 2L}, new Object[]{"abc", null, 1L}, - new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, - new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, - new Object[]{NULL_VALUE, null, 6L} + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} ) ); } @@ -9993,9 +9993,9 @@ public void testGroupingSetsWithOrderByDimension() throws Exception new Object[]{"abc", null, 1L}, new Object[]{"a", null, 2L}, new Object[]{"", null, 3L}, - new Object[]{NULL_VALUE, null, 6L}, - new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, - new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L} + new Object[]{NULL_STRING, null, 6L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L} ) ); } @@ -10061,9 +10061,9 @@ public void testGroupingSetsWithOrderByAggregator() throws Exception new Object[]{"abc", null, 1L}, new Object[]{"a", null, 2L}, new Object[]{"", null, 3L}, - new Object[]{NULL_VALUE, timestamp("2000-01-01"), 3L}, - new Object[]{NULL_VALUE, timestamp("2001-01-01"), 3L}, - new Object[]{NULL_VALUE, null, 6L} + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} ) ); } From 922263425044d473f919565b5e5dae2b6313d43f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 25 Feb 2020 11:57:38 -0800 Subject: [PATCH 4/5] PR updates. --- .../druid/query/groupby/GroupByQueryTest.java | 21 +++++++ .../druid/sql/calcite/CalciteQueryTest.java | 58 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java index 34a7d5a52538..2745802a7900 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java @@ -22,6 +22,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.Ordering; +import nl.jqno.equalsverifier.EqualsVerifier; +import nl.jqno.equalsverifier.Warning; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.BaseQuery; @@ -121,4 +123,23 @@ public void testSegmentLookUpForNestedQueries() .build(); Assert.assertEquals(innerQuerySegmentSpec, BaseQuery.getQuerySegmentSpecForLookUp(query)); } + + @Test + public void testEquals() + { + EqualsVerifier.forClass(GroupByQuery.class) + .usingGetClass() + // The 'duration' field is used by equals via getDuration(), which computes it lazily in a way + // that confuses EqualsVerifier. + .suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS) + // Fields derived from other fields are not included in equals/hashCode + .withIgnoredFields( + "applyLimitPushDown", + "postProcessingFn", + "resultRowOrder", + "resultRowPositionLookup", + "universalTimestamp" + ) + .verify(); + } } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index c49306f2862a..e230635476fc 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -9749,6 +9749,64 @@ public void testGroupByRollup() throws Exception ); } + @Test + public void testGroupByRollupDifferentOrder() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + // Like "testGroupByRollup", but the ROLLUP exprs are in the reverse order. + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY ROLLUP (gran, dim2)", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ), + expressionVirtualColumn( + "v1", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0", ValueType.LONG), + new DefaultDimensionSpec("v1", "v1") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} + ) + ); + } + @Test public void testGroupByCube() throws Exception { From 3074be820663cdd6be239baea6303eb37a692334 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 25 Feb 2020 16:40:11 -0800 Subject: [PATCH 5/5] Grouping class hygiene. --- .../druid/sql/calcite/rel/Grouping.java | 7 ++-- .../druid/sql/calcite/rel/GroupingTest.java | 35 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 sql/src/test/java/org/apache/druid/sql/calcite/rel/GroupingTest.java diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java index fc450725a9ef..6e1b03ba0ead 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java @@ -19,6 +19,7 @@ package org.apache.druid.sql.calcite.rel; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; @@ -71,10 +72,10 @@ private Grouping( ) { this.dimensions = ImmutableList.copyOf(dimensions); - this.subtotals = subtotals; + this.subtotals = Preconditions.checkNotNull(subtotals, "subtotals"); this.aggregations = ImmutableList.copyOf(aggregations); this.havingFilter = havingFilter; - this.outputRowSignature = outputRowSignature; + this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature"); // Verify no collisions between dimensions, aggregations, post-aggregations. final Set seen = new HashSet<>(); @@ -106,7 +107,7 @@ public static Grouping create( final List dimensions, final Subtotals subtotals, final List aggregations, - final DimFilter havingFilter, + @Nullable final DimFilter havingFilter, final RowSignature outputRowSignature ) { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/rel/GroupingTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/rel/GroupingTest.java new file mode 100644 index 000000000000..47ed88158774 --- /dev/null +++ b/sql/src/test/java/org/apache/druid/sql/calcite/rel/GroupingTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rel; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Test; + +public class GroupingTest +{ + @Test + public void testEquals() + { + EqualsVerifier.forClass(Grouping.class) + .usingGetClass() + .withNonnullFields("dimensions", "subtotals", "aggregations", "outputRowSignature") + .verify(); + } +}