Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,9 @@ private TimeseriesQuery toTimeseriesQuery(final QueryFeatureInspector queryFeatu
// An aggregation query should return one row per group, with no grouping (e.g. ALL granularity), the entire table
// is the group, so we should not skip empty buckets. When there are no results, this means we return the
// initialized state for given aggregators instead of nothing.
if (!Granularities.ALL.equals(queryGranularity)) {
// Alternatively, the timeseries query should return empty buckets, even with ALL granularity when timeseries query
// was originally a groupBy query, but with the grouping dimensions removed away in Grouping#applyProject
if (!Granularities.ALL.equals(queryGranularity) || grouping.hasGroupingDimensionsDropped()) {
theContext.put(TimeseriesQuery.SKIP_EMPTY_BUCKETS, true);
}
theContext.putAll(plannerContext.getQueryContext());
Expand Down
61 changes: 58 additions & 3 deletions sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,37 @@ public class Grouping
private final DimFilter havingFilter;
private final RowSignature outputRowSignature;

// Denotes whether the original Grouping had more dimensions which were dropped while applying projection to optimize
// the grouping. Used for returning result which is consistent with most SQL implementations, by correspondingly
// setting/unsetting the SKIP_EMPTY_BUCKETS flag, if the GroupBy query can be reduced to a timeseries query.
private final boolean groupingDimensionsDropped;

private Grouping(
final List<DimensionExpression> dimensions,
final Subtotals subtotals,
final List<Aggregation> aggregations,
@Nullable final DimFilter havingFilter,
final RowSignature outputRowSignature
)
{
this(dimensions, subtotals, aggregations, havingFilter, outputRowSignature, false);
}

private Grouping(
final List<DimensionExpression> dimensions,
final Subtotals subtotals,
final List<Aggregation> aggregations,
@Nullable final DimFilter havingFilter,
final RowSignature outputRowSignature,
final boolean groupingDimensionsDropped
)
{
this.dimensions = ImmutableList.copyOf(dimensions);
this.subtotals = Preconditions.checkNotNull(subtotals, "subtotals");
this.aggregations = ImmutableList.copyOf(aggregations);
this.havingFilter = havingFilter;
this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature");
this.groupingDimensionsDropped = groupingDimensionsDropped;

// Verify no collisions between dimensions, aggregations, post-aggregations.
final Set<String> seen = new HashSet<>();
Expand Down Expand Up @@ -103,6 +121,27 @@ private Grouping(
}
}

// This method is private since groupingDimensionsDropped should only be deviated from default in
// applyProject
private static Grouping create(
final List<DimensionExpression> dimensions,
final Subtotals subtotals,
final List<Aggregation> aggregations,
@Nullable final DimFilter havingFilter,
final RowSignature outputRowSignature,
final boolean groupingDimensionsDropped
)
{
return new Grouping(
dimensions,
subtotals,
aggregations,
havingFilter,
outputRowSignature,
groupingDimensionsDropped
);
}

public static Grouping create(
final List<DimensionExpression> dimensions,
final Subtotals subtotals,
Expand Down Expand Up @@ -160,6 +199,11 @@ public RowSignature getOutputRowSignature()
return outputRowSignature;
}

public boolean hasGroupingDimensionsDropped()
{
return groupingDimensionsDropped;
}

/**
* Applies a post-grouping projection.
*
Expand Down Expand Up @@ -187,11 +231,13 @@ public Grouping applyProject(final PlannerContext plannerContext, final Project
// actually want to include a dimension 'dummy'.
final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(project.getChildExps(), null);
final int[] newDimIndexes = new int[dimensions.size()];
boolean droppedDimensions = false;

for (int i = 0; i < dimensions.size(); i++) {
final DimensionExpression dimension = dimensions.get(i);
if (Parser.parse(dimension.getDruidExpression().getExpression(), plannerContext.getExprMacroTable())
.isLiteral() && !aggregateProjectBits.get(i)) {
droppedDimensions = true;
newDimIndexes[i] = -1;
} else {
newDimIndexes[i] = newDimensions.size();
Expand Down Expand Up @@ -225,7 +271,8 @@ public Grouping applyProject(final PlannerContext plannerContext, final Project
newSubtotals,
newAggregations,
havingFilter,
postAggregationProjection.getOutputRowSignature()
postAggregationProjection.getOutputRowSignature(),
droppedDimensions
);
}

Expand All @@ -243,12 +290,20 @@ public boolean equals(Object o)
subtotals.equals(grouping.subtotals) &&
aggregations.equals(grouping.aggregations) &&
Objects.equals(havingFilter, grouping.havingFilter) &&
outputRowSignature.equals(grouping.outputRowSignature);
outputRowSignature.equals(grouping.outputRowSignature) &&
groupingDimensionsDropped == grouping.groupingDimensionsDropped;
}

@Override
public int hashCode()
{
return Objects.hash(dimensions, subtotals, aggregations, havingFilter, outputRowSignature);
return Objects.hash(
dimensions,
subtotals,
aggregations,
havingFilter,
outputRowSignature,
groupingDimensionsDropped
);
}
}
113 changes: 109 additions & 4 deletions sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3911,12 +3911,10 @@ public void testGroupByWithFilterMatchingNothingWithGroupByLiteral() throws Exce
new CountAggregatorFactory("a0"),
new LongMaxAggregatorFactory("a1", "cnt")
))
.context(QUERY_CONTEXT_DEFAULT)
.context(QUERY_CONTEXT_DO_SKIP_EMPTY_BUCKETS)
.build()
),
ImmutableList.of(
new Object[]{0L, NullHandling.sqlCompatible() ? null : Long.MIN_VALUE}
)
ImmutableList.of()
);
}

Expand Down Expand Up @@ -13342,4 +13340,111 @@ public void testCommonVirtualExpressionWithDifferentValueType() throws Exception
ImmutableList.of()
);
}

// When optimization in Grouping#applyProject is applied, and it reduces a Group By query to a timeseries, we
// want it to return empty bucket if no row matches
@Test
public void testReturnEmptyRowWhenGroupByIsConvertedToTimeseriesWithSingleConstantDimension() throws Exception
{
skipVectorize();
testQuery(
"SELECT 'A' from foo WHERE m1 = 50 AND dim1 = 'wat' GROUP BY 'foobar'",
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(
and(
selector("m1", "50", null),
selector("dim1", "wat", null)
)
)
.granularity(Granularities.ALL)
.postAggregators(
new ExpressionPostAggregator("p0", "'A'", null, ExprMacroTable.nil())
)
.context(QUERY_CONTEXT_DO_SKIP_EMPTY_BUCKETS)
.build()
),
ImmutableList.of()
);


// dim1 is not getting reduced to 'wat' in this case in Calcite (ProjectMergeRule is not getting applied),
// therefore the query is not optimized to a timeseries query
testQuery(
"SELECT 'A' from foo WHERE dim1 = 'wat' GROUP BY dim1",
ImmutableList.of(
GroupByQuery.builder()
.setDataSource(
"foo"
)
.setInterval(querySegmentSpec(Intervals.ETERNITY))
.setGranularity(Granularities.ALL)
.addDimension(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING))
.setDimFilter(selector("dim1", "wat", null))
.setPostAggregatorSpecs(
ImmutableList.of(
new ExpressionPostAggregator("p0", "'A'", null, ExprMacroTable.nil())
)
)

.build()
),
ImmutableList.of()
);
}

@Test
public void testReturnEmptyRowWhenGroupByIsConvertedToTimeseriesWithMutlipleConstantDimensions() throws Exception
{
skipVectorize();
testQuery(
"SELECT 'A', dim1 from foo WHERE m1 = 50 AND dim1 = 'wat' GROUP BY dim1",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw I noticed that it runs as a group-by query if we remove the m1 = 50 clause. do you know why?

Copy link
Copy Markdown
Contributor Author

@LakshSingla LakshSingla Jan 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On removing the m1=50 clause, the dim1 is not getting reduced to wat literal in the Calcite planner phase, so the optimization in Grouping.java to eliminate the literals is not getting applied.
I checked the place where this reduction is happening, and it's in the ProjectMergeRule of Calcite. When there's a single project, then ProjectMergeRule is not getting invoked.

ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(
and(
selector("m1", "50", null),
selector("dim1", "wat", null)
)
)
.granularity(Granularities.ALL)
.postAggregators(
new ExpressionPostAggregator("p0", "'A'", null, ExprMacroTable.nil()),
new ExpressionPostAggregator("p1", "'wat'", null, ExprMacroTable.nil())
)
.context(QUERY_CONTEXT_DO_SKIP_EMPTY_BUCKETS)
.build()
),
ImmutableList.of()
);

// Sanity test, that even when dimensions are reduced, but should produce a valid result (i.e. when filters are
// correct, then they should
testQuery(
"SELECT 'A', dim1 from foo WHERE m1 = 2.0 AND dim1 = '10.1' GROUP BY dim1",
ImmutableList.of(
Druids.newTimeseriesQueryBuilder()
.dataSource(CalciteTests.DATASOURCE1)
.intervals(querySegmentSpec(Filtration.eternity()))
.filters(
and(
selector("m1", "2.0", null),
selector("dim1", "10.1", null)
)
)
.granularity(Granularities.ALL)
.postAggregators(
new ExpressionPostAggregator("p0", "'A'", null, ExprMacroTable.nil()),
new ExpressionPostAggregator("p1", "'10.1'", null, ExprMacroTable.nil())
)
.context(QUERY_CONTEXT_DO_SKIP_EMPTY_BUCKETS)
.build()
),
ImmutableList.of(new Object[]{"A", "10.1"})
);
}
}