From 35727712547f514ed1cf4a305164df4dea119464 Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 23 Oct 2023 16:39:53 -0700 Subject: [PATCH 1/2] Fixing nested group by query with order by in outer query --- .../calcite/planner/CalciteRulesManager.java | 6 +- .../sql/calcite/CalciteArraysQueryTest.java | 81 ++++++++++--------- .../sql/calcite/CalciteJoinQueryTest.java | 37 ++++++--- .../druid/sql/calcite/CalciteQueryTest.java | 29 +++++-- .../sql/calcite/DrillWindowQueryTest.java | 1 - .../window/queries/nestedAggs/multiWin_5.e | 8 +- 6 files changed, 102 insertions(+), 60 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java index 9a4abf5b226a..b944f3cd5351 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java @@ -177,7 +177,11 @@ public class CalciteRulesManager private static final List ABSTRACT_RELATIONAL_RULES = ImmutableList.of( AbstractConverter.ExpandConversionRule.INSTANCE, - CoreRules.AGGREGATE_REMOVE, + // Removing CoreRules.AGGREGATE_REMOVE rule here + // as after the Calcite upgrade, it would plan queries to a scan over a group by + // with ordering on a non-time column + // which is not allowed in Druid. We should add that rule back + // once Druid starts to support non-time ordering over scan queries CoreRules.UNION_TO_DISTINCT, CoreRules.PROJECT_REMOVE, CoreRules.AGGREGATE_JOIN_TRANSPOSE, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 3a5da7d325ff..ec4dda0dc696 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -50,7 +50,6 @@ import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; -import org.apache.druid.query.aggregation.post.ExpressionPostAggregator; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.expression.TestExprMacroTable; import org.apache.druid.query.extraction.SubstringDimExtractionFn; @@ -3172,45 +3171,55 @@ public void testArrayAggAsArrayFromJoin() public void testArrayAggGroupByArrayAggFromSubquery() { cannotVectorize(); - + skipVectorize(); testQuery( "SELECT dim2, arr, COUNT(*) FROM (SELECT dim2, ARRAY_AGG(DISTINCT dim1) as arr FROM foo WHERE dim1 is not null GROUP BY 1 LIMIT 5) GROUP BY 1,2", QUERY_CONTEXT_NO_STRINGIFY_ARRAY, ImmutableList.of( - new TopNQueryBuilder() - .dataSource(CalciteTests.DATASOURCE1) - .dimension(new DefaultDimensionSpec( - "dim2", - "d0", - ColumnType.STRING - )) - .metric(new DimensionTopNMetricSpec( - null, - StringComparators.LEXICOGRAPHIC - )) - .filters(notNull("dim1")) - .threshold(5) - .aggregators(new ExpressionLambdaAggregatorFactory( - "a0", - ImmutableSet.of("dim1"), - "__acc", - "ARRAY[]", - "ARRAY[]", - true, - true, - false, - "array_set_add(\"__acc\", \"dim1\")", - "array_set_add_all(\"__acc\", \"a0\")", - null, - null, - new HumanReadableBytes(1024), - ExprMacroTable.nil() - )) - .intervals(querySegmentSpec(Filtration.eternity())) - .granularity(Granularities.ALL) - .context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) - .postAggregators(new ExpressionPostAggregator("s0", "1", null, ExprMacroTable.nil())) - .build() + GroupByQuery.builder() + .setDataSource(new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .dimension(new DefaultDimensionSpec( + "dim2", + "d0", + ColumnType.STRING + )) + .metric(new DimensionTopNMetricSpec( + null, + StringComparators.LEXICOGRAPHIC + )) + .filters(notNull("dim1")) + .threshold(5) + .aggregators(new ExpressionLambdaAggregatorFactory( + "a0", + ImmutableSet.of("dim1"), + "__acc", + "ARRAY[]", + "ARRAY[]", + true, + true, + false, + "array_set_add(\"__acc\", \"dim1\")", + "array_set_add_all(\"__acc\", \"a0\")", + null, + null, + new HumanReadableBytes(1024), + ExprMacroTable.nil() + )) + .intervals(querySegmentSpec(Filtration.eternity())) + .granularity(Granularities.ALL) + .context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + new DefaultDimensionSpec("d0", "_d0", ColumnType.STRING), + new DefaultDimensionSpec("a0", "_d1", ColumnType.STRING_ARRAY) + ) + .setAggregatorSpecs(new CountAggregatorFactory("_a0")) + .setContext(QUERY_CONTEXT_DEFAULT) + .build() ), useDefault ? ImmutableList.of( diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 0882f3c9cb12..ba55f7b4e4a7 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -5381,6 +5381,8 @@ public void testInnerJoinWithFilterPushdownAndManyFiltersNonEmptyResults(Map queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT); @@ -5397,21 +5399,32 @@ public void testPlanWithInFilterMoreThanInSubQueryThreshold() .dataSource( JoinDataSource.create( new TableDataSource(CalciteTests.DATASOURCE3), - InlineDataSource.fromIterable( - ImmutableList.of( - new Object[]{4842L}, - new Object[]{4844L}, - new Object[]{4845L}, - new Object[]{14905L}, - new Object[]{4853L}, - new Object[]{29064L} - ), - RowSignature.builder() - .add("ROW_VALUE", ColumnType.LONG) + new QueryDataSource( + GroupByQuery.builder() + .setDataSource(InlineDataSource.fromIterable( + ImmutableList.of( + new Object[]{4842L}, + new Object[]{4844L}, + new Object[]{4845L}, + new Object[]{14905L}, + new Object[]{4853L}, + new Object[]{29064L} + ), + RowSignature.builder() + .add("ROW_VALUE", ColumnType.LONG) + .build() + ) + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setDimensions( + new DefaultDimensionSpec("ROW_VALUE", "d0", ColumnType.LONG) + ) + .setGranularity(Granularities.ALL) + .setLimitSpec(NoopLimitSpec.instance()) .build() ), "j0.", - "(\"l1\" == \"j0.ROW_VALUE\")", + "(\"l1\" == \"j0.d0\")", JoinType.INNER, null, ExprMacroTable.nil(), diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 811227162ac7..7a39da2677f4 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -13949,15 +13949,32 @@ public void testSubqueryTypeMismatchWithLiterals() + "group by 1", ImmutableList.of( GroupByQuery.builder() - .setDataSource(CalciteTests.DATASOURCE3) + .setDataSource(GroupByQuery.builder() + .setDataSource(CalciteTests.DATASOURCE3) + .setInterval(querySegmentSpec(Intervals.ETERNITY)) + .setGranularity(Granularities.ALL) + .addDimension(new DefaultDimensionSpec( + "dim1", + "_d0", + ColumnType.STRING + )) + .addAggregator(new LongSumAggregatorFactory("a0", "l1")) + .build() + ) .setInterval(querySegmentSpec(Intervals.ETERNITY)) - .setGranularity(Granularities.ALL) - .addDimension(new DefaultDimensionSpec("dim1", "_d0", ColumnType.STRING)) - .addAggregator(new LongSumAggregatorFactory("a0", "l1")) - .setPostAggregatorSpecs(ImmutableList.of( - expressionPostAgg("p0", "case_searched((\"a0\" == 0),1,0)") + .setDimensions(new DefaultDimensionSpec("_d0", "d0", ColumnType.STRING)) + .setAggregatorSpecs(aggregators( + new FilteredAggregatorFactory( + new CountAggregatorFactory("_a0"), + useDefault ? + selector("a0", "0") : + equality("a0", 0, ColumnType.LONG) + ) )) + .setGranularity(Granularities.ALL) + .setContext(QUERY_CONTEXT_DEFAULT) .build() + ), useDefault ? ImmutableList.of( new Object[]{"", 0L}, diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java index bf13b88e2917..58138ffd1ee6 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java @@ -4364,7 +4364,6 @@ public void test_aggregates_winFnQry_9() windowQueryTest(); } - @NotYetSupported(Modes.CANNOT_APPLY_VIRTUAL_COL) @DrillTest("nestedAggs/multiWin_5") @Test public void test_nestedAggs_multiWin_5() diff --git a/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e b/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e index 03d3e377f3d1..d0d59256426b 100644 --- a/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e +++ b/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e @@ -8,15 +8,15 @@ false 280487665.00748299428571428571 false 295757331.64717262000000000000 false 302662813.16785714333333333333 false 304608131.82107142900000000000 -false 303537640.51502361272727272727 +false 303537640.515023651272727272727 true 4.0000000000000000 true 4.5000000000000000 true 5.2222222222222222 true 4101.1041666666666667 true 5907.4433333333333333 true 284524.6472222222222778 -true 449300.4527210884353810 +true 449300.4527210885000000 true 550414.3805059523809583 -true 613525.0542768959435185 +true 613525.0542768961000000 true 652829.5088492063491667 -true 676669.0989538239537879 +true 676669.0989538240000000 From 9dca21728dff07d9071073b619bcf10845966cca Mon Sep 17 00:00:00 2001 From: Soumyava Das Date: Mon, 23 Oct 2023 16:46:15 -0700 Subject: [PATCH 2/2] Adding examples --- .../druid/sql/calcite/CalciteQueryTest.java | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 7a39da2677f4..482b02f61ab2 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -14292,4 +14292,138 @@ public void testUnSupportedNullsLast() .run()); assertThat(e, invalidSqlIs("ASCENDING ordering with NULLS LAST is not supported! (line [1], column [41])")); } + + @Test + public void testInGroupByLimitOutGroupByOrderBy() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "with t AS (SELECT m2, COUNT(m1) as trend_score\n" + + "FROM \"foo\"\n" + + "GROUP BY 1 \n" + + "LIMIT 10\n" + + ")\n" + + "select m2, (MAX(trend_score)) from t\n" + + "where m2 > 2\n" + + "GROUP BY 1 \n" + + "ORDER BY 2 DESC", + QUERY_CONTEXT_DEFAULT, + ImmutableList.of( + new GroupByQuery.Builder() + .setDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .dimension(new DefaultDimensionSpec("m2", "d0", ColumnType.DOUBLE)) + .threshold(10) + .aggregators(aggregators( + useDefault + ? new CountAggregatorFactory("a0") + : new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + notNull("m1") + ) + )) + .metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)) + .context(OUTER_LIMIT_CONTEXT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE) + ) + .setDimFilter( + useDefault ? + bound("d0", "2", null, true, false, null, StringComparators.NUMERIC) : + new RangeFilter("d0", ColumnType.LONG, 2L, null, true, false, null) + ) + .setAggregatorSpecs(aggregators( + new LongMaxAggregatorFactory("_a0", "a0") + )) + .setLimitSpec( + DefaultLimitSpec + .builder() + .orderBy(new OrderByColumnSpec("_a0", Direction.DESCENDING, StringComparators.NUMERIC)) + .build() + ) + .setContext(OUTER_LIMIT_CONTEXT) + .build() + ), + ImmutableList.of( + new Object[]{3.0D, 1L}, + new Object[]{4.0D, 1L}, + new Object[]{5.0D, 1L}, + new Object[]{6.0D, 1L} + ) + ); + } + + @Test + public void testInGroupByOrderByLimitOutGroupByOrderByLimit() + { + skipVectorize(); + cannotVectorize(); + testQuery( + "with t AS (SELECT m2 as mo, COUNT(m1) as trend_score\n" + + "FROM \"foo\"\n" + + "GROUP BY 1\n" + + "ORDER BY trend_score DESC\n" + + "LIMIT 10)\n" + + "select mo, (MAX(trend_score)) from t\n" + + "where mo > 2\n" + + "GROUP BY 1 \n" + + "ORDER BY 2 DESC LIMIT 2\n", + QUERY_CONTEXT_DEFAULT, + ImmutableList.of( + new GroupByQuery.Builder() + .setDataSource( + new TopNQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(querySegmentSpec(Filtration.eternity())) + .dimension(new DefaultDimensionSpec("m2", "d0", ColumnType.DOUBLE)) + .threshold(10) + .aggregators(aggregators( + useDefault + ? new CountAggregatorFactory("a0") + : new FilteredAggregatorFactory( + new CountAggregatorFactory("a0"), + notNull("m1") + ) + )) + .metric(new NumericTopNMetricSpec("a0")) + .context(OUTER_LIMIT_CONTEXT) + .build() + ) + .setInterval(querySegmentSpec(Filtration.eternity())) + .setGranularity(Granularities.ALL) + .setDimensions( + new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE) + ) + .setDimFilter( + useDefault ? + bound("d0", "2", null, true, false, null, StringComparators.NUMERIC) : + new RangeFilter("d0", ColumnType.LONG, 2L, null, true, false, null) + ) + .setAggregatorSpecs(aggregators( + new LongMaxAggregatorFactory("_a0", "a0") + )) + .setLimitSpec( + DefaultLimitSpec + .builder() + .orderBy(new OrderByColumnSpec("_a0", Direction.DESCENDING, StringComparators.NUMERIC)) + .limit(2) + .build() + ) + .setContext(OUTER_LIMIT_CONTEXT) + .build() + ), + ImmutableList.of( + new Object[]{3.0D, 1L}, + new Object[]{4.0D, 1L} + ) + ); + } + }