From b712a5da9efdb9e19392a784464b0f332d73e3d5 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Thu, 13 Mar 2025 11:46:50 +0800 Subject: [PATCH 1/4] cherry pick from #48987 fix distinct window compute wrong result, introduced by #14397 ```sql select distinct sum(value) over(partition by id) from ( select 100 value, 1 id union all select 100, 2 )a; +----------------------------------+ | sum(value) over(partition by id) | +----------------------------------+ | 100 | | 100 | +----------------------------------+ ``` --- .../ProjectWithDistinctToAggregate.java | 7 --- .../ReplaceExpressionByChildOutput.java | 13 +++++ .../aggregate/select_distinct.groovy | 51 +++++++++++++++++++ 3 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 regression-test/suites/nereids_p0/aggregate/select_distinct.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java index f858820d612ca4..05a9048bb9cb52 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java @@ -19,8 +19,6 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; @@ -46,12 +44,7 @@ public Rule build() { return RuleType.PROJECT_WITH_DISTINCT_TO_AGGREGATE.build( logicalProject() .when(LogicalProject::isDistinct) - .whenNot(project -> project.getProjects().stream().anyMatch(this::hasAggregateFunction)) .then(project -> new LogicalAggregate<>(project.getProjects(), false, project.child())) ); } - - private boolean hasAggregateFunction(Expression expression) { - return expression.anyMatch(AggregateFunction.class::isInstance); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java index cd53086f96625d..2afe254e208188 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java @@ -51,6 +51,19 @@ public List buildRules() { return replaceSortExpression(sort, sMap); }) )) + .add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build( + // select distinct + logicalSort( + logicalAggregate(logicalProject()).when(agg -> + agg.getGroupByExpressions().equals(agg.getOutputExpressions()) + && agg.getGroupByExpressions().equals(agg.child().getOutput()) + ) + ).then(sort -> { + LogicalProject project = sort.child().child(); + Map sMap = buildOutputAliasMap(project.getProjects()); + return replaceSortExpression(sort, sMap); + }) + )) .add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build( logicalSort(logicalAggregate()).then(sort -> { LogicalAggregate aggregate = sort.child(); diff --git a/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy b/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy new file mode 100644 index 00000000000000..1ae944be3c6d1a --- /dev/null +++ b/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("select_distinct") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + multi_sql """ + SET enable_nereids_planner=true; + SET enable_fallback_to_original_planner=false; + drop table if exists test_distinct_window; + create table test_distinct_window(id int) distributed by hash(id) properties('replication_num'='1'); + insert into test_distinct_window values(1), (2), (3); + """ + + test { + sql "select distinct sum(value) over(partition by id) from (select 100 value, 1 id union all select 100, 2)a" + result([[100L]]) + } + + test { + sql "select distinct value+1 from (select 100 value, 1 id union all select 100, 2)a order by value+1" + result([[101]]) + } + + test { + sql "select distinct 1, 2, 3 from test_distinct_window" + result([[1, 2, 3]]) + } + + test { + sql "select distinct sum(id) from test_distinct_window" + result([[6L]]) + } +} From 5fbeed478705fe82a13f62c3ac5c73b790148094 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Thu, 13 Mar 2025 17:11:21 +0800 Subject: [PATCH 2/4] fix --- .../doris/nereids/jobs/executor/Analyzer.java | 9 -- .../analysis/EliminateDistinctConstant.java | 48 ------- .../analysis/ProjectToGlobalAggregate.java | 126 ++++++++++++++++-- .../ProjectWithDistinctToAggregate.java | 50 ------- 4 files changed, 115 insertions(+), 118 deletions(-) delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 3a111a7f4d776f..855c28a7a55523 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -30,7 +30,6 @@ import org.apache.doris.nereids.rules.analysis.CheckPolicy; import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint; import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias; -import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots; @@ -40,7 +39,6 @@ import org.apache.doris.nereids.rules.analysis.NormalizeRepeat; import org.apache.doris.nereids.rules.analysis.OneRowRelationExtractAggregate; import org.apache.doris.nereids.rules.analysis.ProjectToGlobalAggregate; -import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate; import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput; import org.apache.doris.nereids.rules.analysis.SubqueryToApply; import org.apache.doris.nereids.rules.analysis.VariableToLiteral; @@ -105,13 +103,6 @@ private static List buildAnalyzerJobs() { bottomUp(new AddInitMaterializationHook()), bottomUp( new ProjectToGlobalAggregate(), - // this rule check's the logicalProject node's isDistinct property - // and replace the logicalProject node with a LogicalAggregate node - // so any rule before this, if create a new logicalProject node - // should make sure isDistinct property is correctly passed around. - // please see rule BindSlotReference or BindFunction for example - new EliminateDistinctConstant(), - new ProjectWithDistinctToAggregate(), new ReplaceExpressionByChildOutput(), new OneRowRelationExtractAggregate() ), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java deleted file mode 100644 index 0d051ee8c87ed4..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.analysis; - -import org.apache.doris.nereids.rules.Rule; -import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.plans.LimitPhase; -import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; -import org.apache.doris.nereids.trees.plans.logical.LogicalProject; - -/** - * EliminateDistinctConstant. - *

- * example sql: - *

- * select distinct 1,2,3 from tbl
- *          =>
- * select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp
- *  
- */ -public class EliminateDistinctConstant extends OneAnalysisRuleFactory { - @Override - public Rule build() { - return RuleType.ELIMINATE_DISTINCT_CONSTANT.build( - logicalProject() - .when(LogicalProject::isDistinct) - .when(project -> project.getProjects().stream().allMatch(Expression::isConstant)) - .then(project -> new LogicalProject(project.getProjects(), new LogicalLimit<>(1, 0, - LimitPhase.ORIGIN, project.child()))) - ); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java index da642e76610dc1..a6916756c10184 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java @@ -17,13 +17,24 @@ package org.apache.doris.nereids.rules.analysis; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitors; +import org.apache.doris.nereids.trees.plans.LimitPhase; +import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import com.google.common.collect.ImmutableList; +import java.util.List; + /** * ProjectToGlobalAggregate. *

@@ -43,17 +54,110 @@ public class ProjectToGlobalAggregate extends OneAnalysisRuleFactory { @Override public Rule build() { return RuleType.PROJECT_TO_GLOBAL_AGGREGATE.build( - logicalProject().then(project -> { - boolean needGlobalAggregate = project.getProjects() - .stream() - .anyMatch(p -> p.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null)); - - if (needGlobalAggregate) { - return new LogicalAggregate<>(ImmutableList.of(), project.getProjects(), project.child()); - } else { - return project; - } - }) + logicalProject().then(project -> { + project = distinctConstantsToLimit1(project); + Plan result = projectToAggregate(project); + return distinctToAggregate(result, project); + }) ); } + + // select distinct 1,2,3 from tbl + // ↓ + // select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp + private static LogicalProject distinctConstantsToLimit1(LogicalProject project) { + if (!project.isDistinct()) { + return project; + } + + boolean allSelectItemAreConstants = true; + for (NamedExpression selectItem : project.getProjects()) { + if (!selectItem.isConstant()) { + allSelectItemAreConstants = false; + break; + } + } + + if (allSelectItemAreConstants) { + return new LogicalProject<>( + project.getProjects(), + new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, project.child()) + ); + } + return project; + } + + // select avg(xxx) from tbl + // ↓ + // LogicalAggregate(groupBy=[], output=[avg(xxx)]) + private static Plan projectToAggregate(LogicalProject project) { + // contains aggregate functions, like sum, avg ? + for (NamedExpression selectItem : project.getProjects()) { + if (selectItem.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null)) { + return new LogicalAggregate<>(ImmutableList.of(), project.getProjects(), project.child()); + } + } + return project; + } + + private static Plan distinctToAggregate(Plan result, LogicalProject originProject) { + if (!originProject.isDistinct()) { + return result; + } + if (result instanceof LogicalProject) { + // remove distinct: select distinct fun(xxx) as c1 from tbl + // + // LogicalProject(distinct=true, output=[fun(xxx) as c1]) + // ↓ + // LogicalAggregate(groupBy=[c1], output=[c1]) + // | + // LogicalProject(output=[fun(xxx) as c1]) + LogicalProject project = (LogicalProject) result; + + ImmutableList.Builder bottomProjectOutput + = ImmutableList.builderWithExpectedSize(project.getProjects().size()); + ImmutableList.Builder topAggOutput + = ImmutableList.builderWithExpectedSize(project.getProjects().size()); + + boolean hasComplexExpr = false; + for (NamedExpression selectItem : project.getProjects()) { + if (selectItem.isSlot()) { + topAggOutput.add(selectItem); + bottomProjectOutput.add(selectItem); + } else if (isAliasLiteral(selectItem)) { + // stay in agg, and eliminate by `ELIMINATE_GROUP_BY_CONSTANT` + topAggOutput.add(selectItem); + } else { + // `FillUpMissingSlots` not support find complex expr in aggregate, + // so we should push down into the bottom project + hasComplexExpr = true; + topAggOutput.add(selectItem.toSlot()); + bottomProjectOutput.add(selectItem); + } + } + + if (!hasComplexExpr) { + List projects = (List) project.getProjects(); + return new LogicalAggregate(projects, projects, project.child()); + } + + LogicalProject removeDistinct = new LogicalProject<>(bottomProjectOutput.build(), project.child()); + ImmutableList aggOutput = topAggOutput.build(); + return new LogicalAggregate(aggOutput, aggOutput, removeDistinct); + } else if (result instanceof LogicalAggregate) { + // remove distinct: select distinct avg(xxx) as c1 from tbl + // + // LogicalProject(distinct=true, output=[avg(xxx) as c1]) + // ↓ + // LogicalAggregate(output=[avg(xxx) as c1]) + return result; + } else { + // never reach + throw new AnalysisException("Unsupported"); + } + } + + private static boolean isAliasLiteral(NamedExpression selectItem) { + return selectItem instanceof Alias && selectItem.child(0) instanceof Literal; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java deleted file mode 100644 index 05a9048bb9cb52..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.analysis; - -import org.apache.doris.nereids.rules.Rule; -import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; -import org.apache.doris.nereids.trees.plans.logical.LogicalProject; - -/** - * ProjectWithDistinctToAggregate. - *

- * example sql: - *

- * select distinct value from tbl
- *
- * LogicalProject(projects=[distinct value])
- *            |
- * LogicalOlapScan(table=tbl)
- *          =>
- * LogicalAggregate(groupBy=[value], output=[value])
- *           |
- * LogicalOlapScan(table=tbl)
- *  
- */ -public class ProjectWithDistinctToAggregate extends OneAnalysisRuleFactory { - @Override - public Rule build() { - return RuleType.PROJECT_WITH_DISTINCT_TO_AGGREGATE.build( - logicalProject() - .when(LogicalProject::isDistinct) - .then(project -> new LogicalAggregate<>(project.getProjects(), false, project.child())) - ); - } -} From b936204ae8515db31d455ec1b287929b6089ceb7 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Thu, 13 Mar 2025 17:17:23 +0800 Subject: [PATCH 3/4] fix --- .../ReplaceExpressionByChildOutput.java | 47 ++++++++++++------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java index 2afe254e208188..5dc85811d502ca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java @@ -51,36 +51,29 @@ public List buildRules() { return replaceSortExpression(sort, sMap); }) )) - .add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build( - // select distinct - logicalSort( - logicalAggregate(logicalProject()).when(agg -> - agg.getGroupByExpressions().equals(agg.getOutputExpressions()) - && agg.getGroupByExpressions().equals(agg.child().getOutput()) - ) - ).then(sort -> { - LogicalProject project = sort.child().child(); - Map sMap = buildOutputAliasMap(project.getProjects()); - return replaceSortExpression(sort, sMap); - }) - )) .add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build( logicalSort(logicalAggregate()).then(sort -> { - LogicalAggregate aggregate = sort.child(); - Map sMap = buildOutputAliasMap(aggregate.getOutputExpressions()); + LogicalAggregate agg = sort.child(); + Map sMap = buildOutputAliasMap(agg.getOutputExpressions()); + if (sMap.isEmpty() && isSelectDistinct(agg)) { + sMap = getSelectDistinctExpressions(agg); + } return replaceSortExpression(sort, sMap); }) )).add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build( logicalSort(logicalHaving(logicalAggregate())).then(sort -> { - LogicalAggregate aggregate = sort.child().child(); - Map sMap = buildOutputAliasMap(aggregate.getOutputExpressions()); + LogicalAggregate agg = sort.child().child(); + Map sMap = buildOutputAliasMap(agg.getOutputExpressions()); + if (sMap.isEmpty() && isSelectDistinct(agg)) { + sMap = getSelectDistinctExpressions(agg); + } return replaceSortExpression(sort, sMap); }) )) .build(); } - private Map buildOutputAliasMap(List output) { + private static Map buildOutputAliasMap(List output) { Map sMap = Maps.newHashMapWithExpectedSize(output.size()); for (NamedExpression expr : output) { if (expr instanceof Alias) { @@ -106,4 +99,22 @@ private LogicalPlan replaceSortExpression(LogicalSort sor return changed ? new LogicalSort<>(newKeys.build(), sort.child()) : sort; } + + private static boolean isSelectDistinct(LogicalAggregate agg) { + return agg.getGroupByExpressions().equals(agg.getOutputExpressions()) + && agg.getGroupByExpressions().equals(agg.child().getOutput()); + } + + private static Map getSelectDistinctExpressions(LogicalAggregate agg) { + Plan child = agg.child(); + List selectItems; + if (child instanceof LogicalProject) { + selectItems = ((LogicalProject) child).getProjects(); + } else if (child instanceof LogicalAggregate) { + selectItems = ((LogicalAggregate) child).getOutputExpressions(); + } else { + selectItems = ImmutableList.of(); + } + return buildOutputAliasMap(selectItems); + } } From e6446a73db4d6378119760ba0011ef3ebf8a6645 Mon Sep 17 00:00:00 2001 From: 924060929 Date: Thu, 13 Mar 2025 17:39:25 +0800 Subject: [PATCH 4/4] ~/github/doris_2.1/regression-test/suites/nereids_p0/aggregate --- .../suites/nereids_p0/aggregate/select_distinct.groovy | 3 --- 1 file changed, 3 deletions(-) diff --git a/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy b/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy index 1ae944be3c6d1a..ddcd2d47e8256b 100644 --- a/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy +++ b/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy @@ -18,9 +18,6 @@ */ suite("select_distinct") { - sql "SET enable_nereids_planner=true" - sql "SET enable_fallback_to_original_planner=false" - multi_sql """ SET enable_nereids_planner=true; SET enable_fallback_to_original_planner=false;