From aa78eeaf35d93e1b4db57177798a2e79c48c9c17 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Thu, 11 Sep 2025 14:23:12 +0800 Subject: [PATCH 1/2] fix --- .../rewrite/DistinctAggStrategySelector.java | 20 ++++++++++++++++++- .../distinct_agg_strategy_selector.groovy | 12 +++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java index 3b9caa40908fec..7e644a3b394120 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java @@ -19,6 +19,7 @@ import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.rules.rewrite.DistinctAggStrategySelector.DistinctSelectorContext; import org.apache.doris.nereids.rules.rewrite.StatsDerive.DeriveContext; @@ -117,9 +118,26 @@ public Plan visitLogicalAggregate(LogicalAggregate agg, Distinct } private boolean shouldUseMultiDistinct(LogicalAggregate agg) { - if (AggregateUtils.containsCountDistinctMultiExpr(agg)) { + boolean mustUseCte = AggregateUtils.containsCountDistinctMultiExpr(agg); + boolean mustUseMulti = agg.getSourceRepeat().isPresent(); + if (mustUseCte && mustUseMulti) { + throw new AnalysisException( + "Unsupported query: GROUPING SETS/ROLLUP/CUBE cannot be used with a combination of " + + "multi-column COUNT(DISTINCT) and other COUNT(DISTINCT) expressions.\n\n" + + "Unsupported scenarios:\n" + + "• COUNT(DISTINCT a, b) with COUNT(DISTINCT a) + GROUPING\n" + + "• COUNT(DISTINCT a, b) with COUNT(DISTINCT a, c) + GROUPING\n\n" + + "Supported scenarios:\n" + + "• Single COUNT(DISTINCT a, b) + GROUPING\n" + + "• Multiple COUNT(DISTINCT single_column) + " + + "GROUPING (e.g., COUNT(DISTINCT a), COUNT(DISTINCT b))"); + } + if (mustUseCte) { return false; } + if (mustUseMulti) { + return true; + } ConnectContext ctx = ConnectContext.get(); if (ctx == null) { return true; diff --git a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy index 406a409390b6fc..a565f55a0bfedd 100644 --- a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy +++ b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy @@ -40,4 +40,16 @@ suite("distinct_agg_strategy_selector") { select count(distinct a_1) , count(distinct b_5) from t1000;""" qt_no_stats_should_use_multi_distinct """explain shape plan select count(distinct d_20) , count(distinct b_5) from t1000 group by a_1;""" + + test { + sql "select count(distinct d_20,b_5) , count(distinct b_5) from t1000 group by grouping sets ((d_20, b_5),())" + exception "Unsupported query" + } + // multi_distinct_strategy = 2 means use cte, but it will be ignored because agg with source repeat should not use cte split + sql "set multi_distinct_strategy=2" + explain { + sql "logical plan select count(distinct d_20) , count(distinct b_5) from t1000 group by grouping sets ((d_20, b_5),())" + contains "multi_distinct_count" + } + sql "set multi_distinct_strategy=0 " } \ No newline at end of file From 5bf7f11536fccdee31474bb3beb06b99bc03bca0 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Thu, 11 Sep 2025 14:31:22 +0800 Subject: [PATCH 2/2] fix --- .../nereids/rules/rewrite/DistinctAggStrategySelector.java | 1 + .../nereids/rules/rewrite/SplitMultiDistinctStrategy.java | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java index 7e644a3b394120..de83915c843d0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java @@ -135,6 +135,7 @@ private boolean shouldUseMultiDistinct(LogicalAggregate agg) { if (mustUseCte) { return false; } + // TODO with source repeat aggregate need to be supported cte split in future if (mustUseMulti) { return true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java index 950149657ef3a4..8fc150c705590a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java @@ -166,12 +166,6 @@ private static boolean isDistinctMultiColumns(AggregateFunction func) { private static void collectDistinctAndNonDistinctFunctions(LogicalAggregate agg, List> aliases, List otherAggFuncs) { - // TODO with source repeat aggregate need to be supported in future - // 这个可能也没有关系,可以先注释掉,之后加一下关于grouping的测试 - // if (agg.getSourceRepeat().isPresent()) { - // return false; - // } - // boolean distinctMultiColumns = false; Map, List> distinctArgToAliasList = new LinkedHashMap<>(); for (NamedExpression namedExpression : agg.getOutputExpressions()) { if (!(namedExpression instanceof Alias) || !(namedExpression.child(0) instanceof AggregateFunction)) {