From a96602eb725d8d1eed1ddcb4cf67e5dfc4251164 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei Date: Mon, 2 Dec 2024 20:04:14 +0800 Subject: [PATCH] [fix](nereids) fix merge_percentile_to_array when has same agg function (#44783) Related PR: #34313 Problem Summary The original PR did not handle the following scenario: ```sql SELECT SUM(a), PERCENTILE(pk, 0.1) AS c1, PERCENTILE(pk, 0.1) AS c2, PERCENTILE(pk, 0.4) AS c3 FROM test_merge_percentile; ``` In this case, the aggregate outputs include two identical functions (PERCENTILE(pk, 0.1)). When constructing the LogicalProject, a map was used where the key is the child of an Alias and the value is the Alias itself. However, this approach loses information when two Aliases share the same child. This PR modifies the map structure to use the child of an Alias as the key and a list of Alias objects as the value. This ensures that all Alias instances with the same child are preserved, resolving the issue of lost information in such cases. --- .../rules/rewrite/MergePercentileToArray.java | 26 +++++++++---------- .../merge_percentile_to_array.out | 12 +++++++++ .../merge_percentile_to_array.groovy | 4 +++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergePercentileToArray.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergePercentileToArray.java index f92ad84bde8525..fe81adf13bf29d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergePercentileToArray.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/MergePercentileToArray.java @@ -152,10 +152,10 @@ private Plan doMerge(LogicalAggregate aggregate) { (List) (List) newPercentileArrays); ImmutableList.Builder newProjectOutputExpressions = ImmutableList.builder(); newProjectOutputExpressions.addAll((List) (List) notChangeForProject); - Map existsAliasMap = Maps.newHashMap(); + Map> existsAliasMap = Maps.newHashMap(); // existsAliasMap is used to keep upper plan refer the same expr for (Alias alias : existsAliases) { - existsAliasMap.put(alias.child(), alias); + existsAliasMap.computeIfAbsent(alias.child(), k -> new ArrayList<>()).add(alias); } Map slotMap = Maps.newHashMap(); // slotMap is used to find the correspondence @@ -169,20 +169,22 @@ private Plan doMerge(LogicalAggregate aggregate) { for (Map.Entry> entry : funcMap.entrySet()) { for (int i = 0; i < entry.getValue().size(); i++) { AggregateFunction aggFunc = entry.getValue().get(i); - Alias originAlias = existsAliasMap.get(aggFunc); - DistinctAndExpr distinctAndExpr = new DistinctAndExpr(aggFunc.child(0), aggFunc.isDistinct()); - Alias newAlias = new Alias(originAlias.getExprId(), new ElementAt(slotMap.get(distinctAndExpr), - new IntegerLiteral(i + 1)), originAlias.getName()); - newProjectOutputExpressions.add(newAlias); + List originAliases = existsAliasMap.get(aggFunc); + for (Alias originAlias : originAliases) { + DistinctAndExpr distinctAndExpr = new DistinctAndExpr(aggFunc.child(0), aggFunc.isDistinct()); + Alias newAlias = new Alias(originAlias.getExprId(), new ElementAt(slotMap.get(distinctAndExpr), + new IntegerLiteral(i + 1)), originAlias.getName()); + newProjectOutputExpressions.add(newAlias); + } } } newProjectOutputExpressions.addAll(groupBySlots); - return new LogicalProject(newProjectOutputExpressions.build(), newAggregate); + return new LogicalProject<>(newProjectOutputExpressions.build(), newAggregate); } private static class DistinctAndExpr { - private Expression expression; - private boolean isDistinct; + private final Expression expression; + private final boolean isDistinct; public DistinctAndExpr(Expression expression, boolean isDistinct) { this.expression = expression; @@ -193,10 +195,6 @@ public Expression getExpression() { return expression; } - public boolean isDistinct() { - return isDistinct; - } - @Override public boolean equals(Object o) { if (this == o) { diff --git a/regression-test/data/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.out b/regression-test/data/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.out index b495302e80d3c8..1b2f876cfba50a 100644 --- a/regression-test/data/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.out +++ b/regression-test/data/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.out @@ -41,3 +41,15 @@ 7.0 \N \N 7.0 7.0 7 +-- !same_percentile -- +52 1.0 1.0 2.0 + +-- !same_percentile_group_by -- +\N 6.0 6.0 6.0 +2 3.0 3.0 3.0 +25 3.0 3.0 3.0 +4 2.0 2.0 2.0 +5 1.0 1.0 1.6 +7 6.0 6.0 6.0 +9 1.2 1.2 1.8 + diff --git a/regression-test/suites/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.groovy b/regression-test/suites/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.groovy index 2071d75ae85d4e..5bb13c6336c264 100644 --- a/regression-test/suites/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.groovy +++ b/regression-test/suites/nereids_rules_p0/merge_percentile_to_array/merge_percentile_to_array.groovy @@ -57,4 +57,8 @@ suite("merge_percentile_to_array") { percentile(abs(a), 0.55) as c2 from test_merge_percentile group by a) t; """ + order_qt_same_percentile """select sum(a),percentile(pk, 0.1) as c1 , percentile(pk, 0.1) as c2 , + percentile(pk, 0.4) as c2 from test_merge_percentile;""" + order_qt_same_percentile_group_by """select sum(a),percentile(pk, 0.1) as c1 , percentile(pk, 0.1) as c2 , + percentile(pk, 0.4) as c2 from test_merge_percentile group by a;""" } \ No newline at end of file