diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 269decfbcfe7..bd228e712f19 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -65,7 +65,7 @@ Druid SQL supports SELECT queries with the following structure: SELECT [ ALL | DISTINCT ] { * | exprs } FROM table [ WHERE expr ] -[ GROUP BY exprs ] +[ GROUP BY [ exprs | GROUPING SETS ( (exprs), ... ) | ROLLUP (exprs) | CUBE (exprs) ] ] [ HAVING expr ] [ ORDER BY expr [ ASC | DESC ], expr [ ASC | DESC ], ... ] [ LIMIT limit ] @@ -86,6 +86,22 @@ trigger an aggregation query using one of Druid's [three native aggregation quer can refer to an expression or a select clause ordinal position (like `GROUP BY 2` to group by the second selected column). +The GROUP BY clause can also refer to multiple grouping sets in three ways. The most flexible is GROUP BY GROUPING SETS, +for example `GROUP BY GROUPING SETS ( (country, city), () )`. This example is equivalent to a `GROUP BY country, city` +followed by `GROUP BY ()` (a grand total). With GROUPING SETS, the underlying data is only scanned one time, leading to +better efficiency. Second, GROUP BY ROLLUP computes a grouping set for each level of the grouping expressions. For +example `GROUP BY ROLLUP (country, city)` is equivalent to `GROUP BY GROUPING SETS ( (country, city), (country), () )` +and will produce grouped rows for each country / city pair, along with subtotals for each country, along with a grand +total. Finally, GROUP BY CUBE computes a grouping set for each combination of grouping expressions. For example, +`GROUP BY CUBE (country, city)` is equivalent to `GROUP BY GROUPING SETS ( (country, city), (country), (city), () )`. +Grouping columns that do not apply to a particular row will contain `NULL`. For example, when computing +`GROUP BY GROUPING SETS ( (country, city), () )`, the grand total row corresponding to `()` will have `NULL` for the +"country" and "city" columns. + +When using GROUP BY GROUPING SETS, GROUP BY ROLLUP, or GROUP BY CUBE, be aware that results may not be generated in the +order that you specify your grouping sets in the query. If you need results to be generated in a particular order, use +the ORDER BY clause. + The HAVING clause refers to columns that are present after execution of GROUP BY. It can be used to filter on either grouping expressions or aggregated values. It can only be used together with GROUP BY. diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java index 7c3a70d6e97b..41425b93b202 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQuery.java @@ -794,6 +794,11 @@ public GroupByQuery withQuerySegmentSpec(QuerySegmentSpec spec) return new Builder(this).setQuerySegmentSpec(spec).build(); } + public GroupByQuery withVirtualColumns(final VirtualColumns virtualColumns) + { + return new Builder(this).setVirtualColumns(virtualColumns).build(); + } + public GroupByQuery withDimFilter(@Nullable final DimFilter dimFilter) { return new Builder(this).setDimFilter(dimFilter).build(); @@ -1198,6 +1203,7 @@ public String toString() ", dimensions=" + dimensions + ", aggregatorSpecs=" + aggregatorSpecs + ", postAggregatorSpecs=" + postAggregatorSpecs + + (subtotalsSpec != null ? (", subtotalsSpec=" + subtotalsSpec) : "") + ", havingSpec=" + havingSpec + ", context=" + getContext() + '}'; @@ -1222,7 +1228,8 @@ public boolean equals(final Object o) Objects.equals(dimFilter, that.dimFilter) && Objects.equals(dimensions, that.dimensions) && Objects.equals(aggregatorSpecs, that.aggregatorSpecs) && - Objects.equals(postAggregatorSpecs, that.postAggregatorSpecs); + Objects.equals(postAggregatorSpecs, that.postAggregatorSpecs) && + Objects.equals(subtotalsSpec, that.subtotalsSpec); } @Override @@ -1236,7 +1243,8 @@ public int hashCode() dimFilter, dimensions, aggregatorSpecs, - postAggregatorSpecs + postAggregatorSpecs, + subtotalsSpec ); } } diff --git a/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java b/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java index 587704bbf2a0..971194bee179 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/strategy/GroupByStrategyV2.java @@ -67,6 +67,7 @@ import org.apache.druid.query.groupby.resource.GroupByQueryResource; import org.apache.druid.query.spec.MultipleIntervalSegmentSpec; import org.apache.druid.segment.StorageAdapter; +import org.apache.druid.segment.VirtualColumns; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -355,7 +356,11 @@ public Sequence processSubtotalsSpec( GroupByRowProcessor.ResultSupplier resultSupplierOne = null; try { - GroupByQuery queryWithoutSubtotalsSpec = query + // baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results* + // rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only + // make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them + // one-by-one soon enough. + GroupByQuery baseSubtotalQuery = query .withDimensionSpecs(query.getDimensions().stream().map( dimSpec -> new DefaultDimensionSpec( dimSpec.getOutputName(), @@ -369,13 +374,13 @@ public Sequence processSubtotalsSpec( .map(AggregatorFactory::getCombiningFactory) .collect(Collectors.toList()) ) - .withSubtotalsSpec(null) - .withDimFilter(null); - + .withVirtualColumns(VirtualColumns.EMPTY) + .withDimFilter(null) + .withSubtotalsSpec(null); resultSupplierOne = GroupByRowProcessor.process( - queryWithoutSubtotalsSpec, - queryWithoutSubtotalsSpec, + baseSubtotalQuery, + baseSubtotalQuery, queryResult, configSupplier.get(), resource, @@ -384,13 +389,13 @@ public Sequence processSubtotalsSpec( processingConfig.intermediateComputeSizeBytes() ); - List queryDimNames = queryWithoutSubtotalsSpec.getDimensions().stream().map(DimensionSpec::getOutputName) - .collect(Collectors.toList()); + List queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName) + .collect(Collectors.toList()); // Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec. Set aggsAndPostAggs = null; - if (queryWithoutSubtotalsSpec.getLimitSpec() != null && !(queryWithoutSubtotalsSpec.getLimitSpec() instanceof NoopLimitSpec)) { - aggsAndPostAggs = getAggregatorAndPostAggregatorNames(queryWithoutSubtotalsSpec); + if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) { + aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery); } List> subtotals = query.getSubtotalsSpec(); @@ -425,14 +430,14 @@ public Sequence processSubtotalsSpec( // Create appropriate LimitSpec for subtotal query LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance(); - if (queryWithoutSubtotalsSpec.getLimitSpec() != null && !(queryWithoutSubtotalsSpec.getLimitSpec() instanceof NoopLimitSpec)) { - Set columns = new HashSet(aggsAndPostAggs); + if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) { + Set columns = new HashSet<>(aggsAndPostAggs); columns.addAll(subtotalSpec); - subtotalQueryLimitSpec = queryWithoutSubtotalsSpec.getLimitSpec().filterColumns(columns); + subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns); } - GroupByQuery subtotalQuery = queryWithoutSubtotalsSpec + GroupByQuery subtotalQuery = baseSubtotalQuery .withLimitSpec(subtotalQueryLimitSpec) .withDimensionSpecs(newDimensions); @@ -451,7 +456,7 @@ public Sequence processSubtotalsSpec( // Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate // merge buffers for processing subtotal. Supplier resultSupplierTwo = () -> GroupByRowProcessor.process( - queryWithoutSubtotalsSpec, + baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subtotalSpec), configSupplier.get(), @@ -468,7 +473,7 @@ public Sequence processSubtotalsSpec( } return Sequences.withBaggage( - Sequences.concat(subtotalsResults), + query.postProcess(Sequences.concat(subtotalsResults)), resultSupplierOne //this will close resources allocated by resultSupplierOne after sequence read ); } @@ -489,21 +494,17 @@ private Sequence processSubtotalsResultAndOptionallyClose( // on sequence read if closeOnSequenceRead is true. try { Supplier memoizedSupplier = Suppliers.memoize(baseResultsSupplier); - return applyPostProcessing( - mergeResults( - (queryPlus, responseContext) -> - new LazySequence<>( - () -> Sequences.withBaggage( - memoizedSupplier.get().results(dimsToInclude), - closeOnSequenceRead ? () -> CloseQuietly.close(memoizedSupplier.get()) : () -> {} - ) - ), - subtotalQuery, - null - ), - subtotalQuery + return mergeResults( + (queryPlus, responseContext) -> + new LazySequence<>( + () -> Sequences.withBaggage( + memoizedSupplier.get().results(dimsToInclude), + closeOnSequenceRead ? () -> CloseQuietly.close(memoizedSupplier.get()) : () -> {} + ) + ), + subtotalQuery, + null ); - } catch (Exception ex) { CloseQuietly.close(baseResultsSupplier.get()); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java index 037b2b75a973..b55abe4d7346 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryRunnerTest.java @@ -412,6 +412,8 @@ public ByteBuffer get() @Parameterized.Parameters(name = "{0}") public static Collection constructorFeeder() { + NullHandling.initializeForTests(); + final List constructors = new ArrayList<>(); for (GroupByQueryConfig config : testConfigs()) { final Pair factoryAndCloser = makeQueryRunnerFactory(config); @@ -7196,38 +7198,13 @@ public void testGroupByWithSubtotalsSpecWithOrderLimit() .addOrderByColumn("idx") .addOrderByColumn("alias") .addOrderByColumn("market") - .setLimit(1) + .setLimit(3) .build(); List expectedResults = Arrays.asList( - makeRow( - query, - "2011-04-01", - "alias", - "technology", - "rows", - 1L, - "idx", - 78L - ), - makeRow( - query, - "2011-04-01T00:00:00.000Z", - "market", - "spot", - "rows", - 9L, - "idx", - 1102L - ), - makeRow( - query, - "2011-04-01T00:00:00.000Z", - "rows", - 13L, - "idx", - 6619L - ) + makeRow(query, "2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), + makeRow(query, "2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), + makeRow(query, "2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L) ); Iterable results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query); diff --git a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java index 34a7d5a52538..2745802a7900 100644 --- a/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/groupby/GroupByQueryTest.java @@ -22,6 +22,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.Ordering; +import nl.jqno.equalsverifier.EqualsVerifier; +import nl.jqno.equalsverifier.Warning; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.query.BaseQuery; @@ -121,4 +123,23 @@ public void testSegmentLookUpForNestedQueries() .build(); Assert.assertEquals(innerQuerySegmentSpec, BaseQuery.getQuerySegmentSpecForLookUp(query)); } + + @Test + public void testEquals() + { + EqualsVerifier.forClass(GroupByQuery.class) + .usingGetClass() + // The 'duration' field is used by equals via getDuration(), which computes it lazily in a way + // that confuses EqualsVerifier. + .suppress(Warning.NULL_FIELDS, Warning.NONFINAL_FIELDS) + // Fields derived from other fields are not included in equals/hashCode + .withIgnoredFields( + "applyLimitPushDown", + "postProcessingFn", + "resultRowOrder", + "resultRowPositionLookup", + "universalTimestamp" + ) + .verify(); + } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java index b2b3659a7437..bcc717f2f8cf 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Rules.java @@ -231,8 +231,8 @@ private static List baseRuleSet(final PlannerContext plannerContext) rules.addAll(SUB_QUERY_REMOVE_RULES); if (!plannerConfig.isUseApproximateCountDistinct()) { - // We'll need this to expand COUNT DISTINCTs. - // Avoid AggregateExpandDistinctAggregatesRule.INSTANCE; it uses grouping sets and we don't support those. + // For some reason, even though we support grouping sets, using AggregateExpandDistinctAggregatesRule.INSTANCE + // here causes CalciteQueryTest#testExactCountDistinctWithGroupingAndOtherAggregators to fail. rules.add(AggregateExpandDistinctAggregatesRule.JOIN); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java index 92392cf866aa..09bc584b774f 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java @@ -26,7 +26,8 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; -import org.apache.calcite.plan.RelOptUtil; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; @@ -44,7 +45,6 @@ import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.granularity.Granularity; -import org.apache.druid.math.expr.Parser; import org.apache.druid.query.DataSource; import org.apache.druid.query.Query; import org.apache.druid.query.QueryDataSource; @@ -317,6 +317,11 @@ private static Grouping computeGrouping( virtualColumnRegistry ); + final Subtotals subtotals = computeSubtotals( + partialQuery, + rowSignature + ); + final List aggregations = computeAggregations( partialQuery, plannerContext, @@ -342,33 +347,12 @@ private static Grouping computeGrouping( aggregateRowSignature ); + final Grouping grouping = Grouping.create(dimensions, subtotals, aggregations, havingFilter, aggregateRowSignature); + if (aggregateProject == null) { - return Grouping.create(dimensions, aggregations, havingFilter, aggregateRowSignature); + return grouping; } else { - final Projection postAggregationProjection = Projection.postAggregation( - aggregateProject, - plannerContext, - aggregateRowSignature, - "p" - ); - - postAggregationProjection.getPostAggregators().forEach( - postAggregator -> aggregations.add(Aggregation.create(postAggregator)) - ); - - // Remove literal dimensions that did not appear in the projection. This is useful for queries - // like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't - // actually want to include a dimension 'dummy'. - final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(aggregateProject.getChildExps(), null); - for (int i = dimensions.size() - 1; i >= 0; i--) { - final DimensionExpression dimension = dimensions.get(i); - if (Parser.parse(dimension.getDruidExpression().getExpression(), plannerContext.getExprMacroTable()) - .isLiteral() && !aggregateProjectBits.get(i)) { - dimensions.remove(i); - } - } - - return Grouping.create(dimensions, aggregations, havingFilter, postAggregationProjection.getOutputRowSignature()); + return grouping.applyProject(plannerContext, aggregateProject); } } @@ -435,6 +419,38 @@ private static List computeDimensions( return dimensions; } + /** + * Builds a {@link Subtotals} object based on {@link Aggregate#getGroupSets()}. + */ + private static Subtotals computeSubtotals( + final PartialDruidQuery partialQuery, + final RowSignature rowSignature + ) + { + final Aggregate aggregate = partialQuery.getAggregate(); + + // dimBitMapping maps from input field position to group set position (dimension number). + final int[] dimBitMapping = new int[rowSignature.getRowOrder().size()]; + int i = 0; + for (int dimBit : aggregate.getGroupSet()) { + dimBitMapping[dimBit] = i++; + } + + // Use dimBitMapping to remap groupSets (which is input-field-position based) into subtotals (which is + // dimension-list-position based). + final List subtotals = new ArrayList<>(); + for (ImmutableBitSet groupSet : aggregate.getGroupSets()) { + final IntList subtotal = new IntArrayList(); + for (int dimBit : groupSet) { + subtotal.add(dimBitMapping[dimBit]); + } + + subtotals.add(subtotal); + } + + return new Subtotals(subtotals); + } + /** * Returns aggregations corresponding to {@code aggregate.getAggCallList()}, in the same order. * @@ -706,7 +722,9 @@ private Query computeQuery() @Nullable public TimeseriesQuery toTimeseriesQuery() { - if (grouping == null || grouping.getHavingFilter() != null) { + if (grouping == null + || grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) + || grouping.getHavingFilter() != null) { return null; } @@ -787,9 +805,10 @@ public TimeseriesQuery toTimeseriesQuery() @Nullable public TopNQuery toTopNQuery() { - // Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING. + // Must have GROUP BY one column, no GROUPING SETS, ORDER BY ≤ 1 column, limit less than maxTopNLimit, no HAVING. final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 + && !grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) && sorting != null && (sorting.getOrderBys().size() <= 1 && sorting.isLimited() && sorting.getLimit() <= plannerContext.getPlannerConfig() @@ -895,9 +914,12 @@ public GroupByQuery toGroupByQuery() postAggregators, havingSpec, sorting != null - ? new DefaultLimitSpec(sorting.getOrderBys(), sorting.isLimited() ? Ints.checkedCast(sorting.getLimit()) : null) + ? new DefaultLimitSpec( + sorting.getOrderBys(), + sorting.isLimited() ? Ints.checkedCast(sorting.getLimit()) : null + ) : NoopLimitSpec.instance(), - null, + grouping.getSubtotals().toSubtotalsSpec(grouping.getDimensionSpecs()), ImmutableSortedMap.copyOf(plannerContext.getQueryContext()) ); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java index 8ce4cd8e443a..6e1b03ba0ead 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java @@ -19,17 +19,26 @@ package org.apache.druid.sql.calcite.rel; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.druid.java.util.common.ISE; +import org.apache.druid.math.expr.Parser; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.PostAggregator; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.sql.calcite.aggregation.Aggregation; import org.apache.druid.sql.calcite.aggregation.DimensionExpression; +import org.apache.druid.sql.calcite.planner.PlannerContext; import org.apache.druid.sql.calcite.table.RowSignature; import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -48,23 +57,27 @@ public class Grouping { private final List dimensions; + private final Subtotals subtotals; private final List aggregations; + @Nullable private final DimFilter havingFilter; private final RowSignature outputRowSignature; private Grouping( final List dimensions, + final Subtotals subtotals, final List aggregations, - final DimFilter havingFilter, + @Nullable final DimFilter havingFilter, final RowSignature outputRowSignature ) { this.dimensions = ImmutableList.copyOf(dimensions); + this.subtotals = Preconditions.checkNotNull(subtotals, "subtotals"); this.aggregations = ImmutableList.copyOf(aggregations); this.havingFilter = havingFilter; - this.outputRowSignature = outputRowSignature; + this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature"); - // Verify no collisions. + // Verify no collisions between dimensions, aggregations, post-aggregations. final Set seen = new HashSet<>(); for (DimensionExpression dimensionExpression : dimensions) { if (!seen.add(dimensionExpression.getOutputName())) { @@ -92,12 +105,13 @@ private Grouping( public static Grouping create( final List dimensions, + final Subtotals subtotals, final List aggregations, - final DimFilter havingFilter, + @Nullable final DimFilter havingFilter, final RowSignature outputRowSignature ) { - return new Grouping(dimensions, aggregations, havingFilter, outputRowSignature); + return new Grouping(dimensions, subtotals, aggregations, havingFilter, outputRowSignature); } public List getDimensions() @@ -105,6 +119,11 @@ public List getDimensions() return dimensions; } + public Subtotals getSubtotals() + { + return subtotals; + } + public List getAggregations() { return aggregations; @@ -141,8 +160,77 @@ public RowSignature getOutputRowSignature() return outputRowSignature; } + /** + * Applies a post-grouping projection. + * + * @see DruidQuery#computeGrouping which uses this + */ + public Grouping applyProject(final PlannerContext plannerContext, final Project project) + { + final List newDimensions = new ArrayList<>(); + final List newAggregations = new ArrayList<>(aggregations); + final Subtotals newSubtotals; + + final Projection postAggregationProjection = Projection.postAggregation( + project, + plannerContext, + outputRowSignature, + "p" + ); + + postAggregationProjection.getPostAggregators().forEach( + postAggregator -> newAggregations.add(Aggregation.create(postAggregator)) + ); + + // Remove literal dimensions that did not appear in the projection. This is useful for queries + // like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't + // actually want to include a dimension 'dummy'. + final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(project.getChildExps(), null); + final int[] newDimIndexes = new int[dimensions.size()]; + + for (int i = 0; i < dimensions.size(); i++) { + final DimensionExpression dimension = dimensions.get(i); + if (Parser.parse(dimension.getDruidExpression().getExpression(), plannerContext.getExprMacroTable()) + .isLiteral() && !aggregateProjectBits.get(i)) { + newDimIndexes[i] = -1; + } else { + newDimIndexes[i] = newDimensions.size(); + newDimensions.add(dimension); + } + } + + // Renumber subtotals, if needed, to account for removed dummy dimensions. + if (newDimensions.size() != dimensions.size()) { + final List newSubtotalsList = new ArrayList<>(); + + for (IntList subtotal : subtotals.getSubtotals()) { + final IntList newSubtotal = new IntArrayList(); + for (int dimIndex : subtotal) { + final int newDimIndex = newDimIndexes[dimIndex]; + if (newDimIndex >= 0) { + newSubtotal.add(newDimIndex); + } + } + + newSubtotalsList.add(newSubtotal); + } + + newSubtotals = new Subtotals(newSubtotalsList); + } else { + newSubtotals = subtotals; + } + + return Grouping.create( + newDimensions, + newSubtotals, + newAggregations, + havingFilter, + postAggregationProjection.getOutputRowSignature() + ); + } + @Override - public boolean equals(final Object o) + public boolean equals(Object o) { if (this == o) { return true; @@ -150,27 +238,17 @@ public boolean equals(final Object o) if (o == null || getClass() != o.getClass()) { return false; } - final Grouping grouping = (Grouping) o; - return Objects.equals(dimensions, grouping.dimensions) && - Objects.equals(aggregations, grouping.aggregations) && + Grouping grouping = (Grouping) o; + return dimensions.equals(grouping.dimensions) && + subtotals.equals(grouping.subtotals) && + aggregations.equals(grouping.aggregations) && Objects.equals(havingFilter, grouping.havingFilter) && - Objects.equals(outputRowSignature, grouping.outputRowSignature); + outputRowSignature.equals(grouping.outputRowSignature); } @Override public int hashCode() { - return Objects.hash(dimensions, aggregations, havingFilter, outputRowSignature); - } - - @Override - public String toString() - { - return "Grouping{" + - "dimensions=" + dimensions + - ", aggregations=" + aggregations + - ", havingFilter=" + havingFilter + - ", outputRowSignature=" + outputRowSignature + - '}'; + return Objects.hash(dimensions, subtotals, aggregations, havingFilter, outputRowSignature); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Subtotals.java b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Subtotals.java new file mode 100644 index 000000000000..64f4fb9e9c43 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Subtotals.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rel; + + +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.druid.query.dimension.DimensionSpec; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * Represents the Druid groupBy query concept of subtotals, which is similar to GROUPING SETS. + */ +public class Subtotals +{ + /** + * List of subtotals: each one is a list of dimension indexes. (i.e. [0, 1] means use the first and second + * dimensions). + */ + private final List subtotals; + + Subtotals(List subtotals) + { + this.subtotals = subtotals; + } + + public List getSubtotals() + { + return subtotals; + } + + @Nullable + public List> toSubtotalsSpec(final List dimensions) + { + if (hasEffect(dimensions)) { + return subtotals.stream() + .map( + subtotalInts -> { + final List subtotalDimensionNames = new ArrayList<>(); + for (int dimIndex : subtotalInts) { + subtotalDimensionNames.add(dimensions.get(dimIndex).getOutputName()); + } + return subtotalDimensionNames; + } + ) + .collect(Collectors.toList()); + } else { + return null; + } + } + + /** + * Returns whether this subtotals spec has an effect, and cannot be ignored. + */ + public boolean hasEffect(final List dimensionSpecs) + { + if (subtotals.isEmpty() || (subtotals.size() == 1 && subtotals.get(0).size() == dimensionSpecs.size())) { + return false; + } else { + return true; + } + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Subtotals subtotals1 = (Subtotals) o; + return subtotals.equals(subtotals1.subtotals); + } + + @Override + public int hashCode() + { + return Objects.hash(subtotals); + } + + @Override + public String toString() + { + return "Subtotals{" + + "subtotals=" + subtotals + + '}'; + } +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java index 8241592d3315..4c79e80b39bf 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/ProjectAggregatePruneUnusedCallRule.java @@ -50,13 +50,6 @@ public static ProjectAggregatePruneUnusedCallRule instance() return INSTANCE; } - @Override - public boolean matches(final RelOptRuleCall call) - { - final Aggregate aggregate = call.rel(1); - return !aggregate.indicator && aggregate.getGroupSets().size() == 1; - } - @Override public void onMatch(final RelOptRuleCall call) { @@ -90,7 +83,6 @@ public void onMatch(final RelOptRuleCall call) final Aggregate newAggregate = aggregate.copy( aggregate.getTraitSet(), aggregate.getInput(), - aggregate.indicator, aggregate.getGroupSet(), aggregate.getGroupSets(), newAggregateCalls @@ -100,14 +92,13 @@ public void onMatch(final RelOptRuleCall call) final List fixUpProjects = new ArrayList<>(); final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); - // Project the group unchanged. + // Project the group set unchanged. for (int i = 0; i < aggregate.getGroupCount(); i++) { fixUpProjects.add(rexBuilder.makeInputRef(newAggregate, i)); } // Replace pruned-out aggregators with NULLs. - int j = aggregate.getGroupCount(); - for (int i = aggregate.getGroupCount(); i < fieldCount; i++) { + for (int i = aggregate.getGroupCount(), j = aggregate.getGroupCount(); i < fieldCount; i++) { if (callsToKeep.get(i)) { fixUpProjects.add(rexBuilder.makeInputRef(newAggregate, j++)); } else { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 35d21553551c..e230635476fc 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -3753,6 +3753,27 @@ public void testGroupByWithFilterMatchingNothing() throws Exception ); } + @Test + public void testGroupByWithGroupByEmpty() throws Exception + { + testQuery( + "SELECT COUNT(*), SUM(cnt) FROM druid.foo GROUP BY ()", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .aggregators(aggregators( + new CountAggregatorFactory("a0"), + new LongSumAggregatorFactory("a1", "cnt") + )) + .context(TIMESERIES_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of(new Object[]{6L, 6L}) + ); + } + @Test public void testGroupByWithFilterMatchingNothingWithGroupByLiteral() throws Exception { @@ -9579,6 +9600,596 @@ public void testGroupByTimeAndOtherDimension() throws Exception ); } + @Test + public void testGroupingSets() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (dim2, gran), (dim2), (gran), () )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithNumericDimension() throws Exception + { + testQuery( + "SELECT cnt, COUNT(*)\n" + + "FROM foo\n" + + "GROUP BY GROUPING SETS ( (cnt), () )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions(dimensions(new DefaultDimensionSpec("cnt", "d0", ValueType.LONG))) + .setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("d0"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{1L, 6L}, + new Object[]{null, 6L} + ) + ); + } + + @Test + public void testGroupByRollup() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY ROLLUP (dim2, gran)", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_STRING, null, 6L} + ) + ); + } + + @Test + public void testGroupByRollupDifferentOrder() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + // Like "testGroupByRollup", but the ROLLUP exprs are in the reverse order. + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY ROLLUP (gran, dim2)", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ), + expressionVirtualColumn( + "v1", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0", ValueType.LONG), + new DefaultDimensionSpec("v1", "v1") + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} + ) + ); + } + + @Test + public void testGroupByCube() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY CUBE (dim2, gran)", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v1"), + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithDummyDimension() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (dim2, 'dummy', gran), (dim2), (gran), ('dummy') )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v2", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v2", "v2", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0", "v2"), + ImmutableList.of("v0"), + ImmutableList.of(), + ImmutableList.of("v2") + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", timestamp("2000-01-01"), 2L}, + new Object[]{"", timestamp("2001-01-01"), 1L}, + new Object[]{"a", timestamp("2000-01-01"), 1L}, + new Object[]{"a", timestamp("2001-01-01"), 1L}, + new Object[]{"abc", timestamp("2001-01-01"), 1L}, + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_STRING, null, 6L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L} + ) + ); + } + + @Test + public void testGroupingSetsNoSuperset() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + // Note: the grouping sets are reordered in the output of this query, but this is allowed. + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"", null, 3L}, + new Object[]{"a", null, 2L}, + new Object[]{"abc", null, 1L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithOrderByDimension() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )\n" + + "ORDER BY gran, dim2 DESC", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "v1", + Direction.ASCENDING, + StringComparators.NUMERIC + ), + new OrderByColumnSpec( + "v0", + Direction.DESCENDING, + StringComparators.LEXICOGRAPHIC + ) + ), + Integer.MAX_VALUE + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"abc", null, 1L}, + new Object[]{"a", null, 2L}, + new Object[]{"", null, 3L}, + new Object[]{NULL_STRING, null, 6L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L} + ) + ); + } + + @Test + public void testGroupingSetsWithOrderByAggregator() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )\n" + + "ORDER BY SUM(cnt)\n", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "a0", + Direction.ASCENDING, + StringComparators.NUMERIC + ) + ), + Integer.MAX_VALUE + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"abc", null, 1L}, + new Object[]{"a", null, 2L}, + new Object[]{"", null, 3L}, + new Object[]{NULL_STRING, timestamp("2000-01-01"), 3L}, + new Object[]{NULL_STRING, timestamp("2001-01-01"), 3L}, + new Object[]{NULL_STRING, null, 6L} + ) + ); + } + + @Test + public void testGroupingSetsWithOrderByAggregatorWithLimit() throws Exception + { + // Cannot vectorize due to virtual columns. + cannotVectorize(); + + testQuery( + "SELECT dim2, gran, SUM(cnt)\n" + + "FROM (SELECT FLOOR(__time TO MONTH) AS gran, COALESCE(dim2, '') dim2, cnt FROM druid.foo) AS x\n" + + "GROUP BY GROUPING SETS ( (), (dim2), (gran) )\n" + + "ORDER BY SUM(cnt)\n" + + "LIMIT 1", + ImmutableList.of( + GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE1) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setVirtualColumns( + expressionVirtualColumn( + "v0", + "case_searched(notnull(\"dim2\"),\"dim2\",'')", + ValueType.STRING + ), + expressionVirtualColumn( + "v1", + "timestamp_floor(\"__time\",'P1M',null,'UTC')", + ValueType.LONG + ) + ) + .setDimensions( + dimensions( + new DefaultDimensionSpec("v0", "v0"), + new DefaultDimensionSpec("v1", "v1", ValueType.LONG) + ) + ) + .setAggregatorSpecs(aggregators(new LongSumAggregatorFactory("a0", "cnt"))) + .setSubtotalsSpec( + ImmutableList.of( + ImmutableList.of("v0"), + ImmutableList.of("v1"), + ImmutableList.of() + ) + ) + .setLimitSpec( + new DefaultLimitSpec( + ImmutableList.of( + new OrderByColumnSpec( + "a0", + Direction.ASCENDING, + StringComparators.NUMERIC + ) + ), + 1 + ) + ) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{"abc", null, 1L} + ) + ); + } + @Test public void testUsingSubqueryAsPartOfAndFilter() throws Exception { @@ -11761,8 +12372,6 @@ public void testLeftRightStringOperators() throws Exception ); } - - @Test public void testQueryContextOuterLimit() throws Exception { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/rel/GroupingTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/rel/GroupingTest.java new file mode 100644 index 000000000000..47ed88158774 --- /dev/null +++ b/sql/src/test/java/org/apache/druid/sql/calcite/rel/GroupingTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.rel; + +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Test; + +public class GroupingTest +{ + @Test + public void testEquals() + { + EqualsVerifier.forClass(Grouping.class) + .usingGetClass() + .withNonnullFields("dimensions", "subtotals", "aggregations", "outputRowSignature") + .verify(); + } +}