From 3f217933cfa382dcfc6ac8e260deea54564fbdcc Mon Sep 17 00:00:00 2001 From: 924060929 Date: Fri, 14 Mar 2025 15:14:44 +0800 Subject: [PATCH] Revert "branch-2.1: [fix](nereids) fix distinct window compute wrong result (#48987) (#49010)" This reverts commit 7123bbfecb5c95836c564593ace544a9d5c22ed5. --- .../doris/nereids/jobs/executor/Analyzer.java | 9 ++ .../analysis/EliminateDistinctConstant.java | 48 +++++++ .../analysis/ProjectToGlobalAggregate.java | 126 ++---------------- .../ProjectWithDistinctToAggregate.java | 57 ++++++++ .../ReplaceExpressionByChildOutput.java | 34 +---- .../aggregate/select_distinct.groovy | 48 ------- 6 files changed, 130 insertions(+), 192 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java delete mode 100644 regression-test/suites/nereids_p0/aggregate/select_distinct.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 855c28a7a55523..3a111a7f4d776f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.rules.analysis.CheckPolicy; import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint; import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias; +import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots; @@ -39,6 +40,7 @@ import org.apache.doris.nereids.rules.analysis.NormalizeRepeat; import org.apache.doris.nereids.rules.analysis.OneRowRelationExtractAggregate; import org.apache.doris.nereids.rules.analysis.ProjectToGlobalAggregate; +import org.apache.doris.nereids.rules.analysis.ProjectWithDistinctToAggregate; import org.apache.doris.nereids.rules.analysis.ReplaceExpressionByChildOutput; import org.apache.doris.nereids.rules.analysis.SubqueryToApply; import org.apache.doris.nereids.rules.analysis.VariableToLiteral; @@ -103,6 +105,13 @@ private static List buildAnalyzerJobs() { bottomUp(new AddInitMaterializationHook()), bottomUp( new ProjectToGlobalAggregate(), + // this rule check's the logicalProject node's isDistinct property + // and replace the logicalProject node with a LogicalAggregate node + // so any rule before this, if create a new logicalProject node + // should make sure isDistinct property is correctly passed around. + // please see rule BindSlotReference or BindFunction for example + new EliminateDistinctConstant(), + new ProjectWithDistinctToAggregate(), new ReplaceExpressionByChildOutput(), new OneRowRelationExtractAggregate() ), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java new file mode 100644 index 00000000000000..0d051ee8c87ed4 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateDistinctConstant.java @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.plans.LimitPhase; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +/** + * EliminateDistinctConstant. + *

+ * example sql: + *

+ * select distinct 1,2,3 from tbl
+ *          =>
+ * select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp
+ *  
+ */ +public class EliminateDistinctConstant extends OneAnalysisRuleFactory { + @Override + public Rule build() { + return RuleType.ELIMINATE_DISTINCT_CONSTANT.build( + logicalProject() + .when(LogicalProject::isDistinct) + .when(project -> project.getProjects().stream().allMatch(Expression::isConstant)) + .then(project -> new LogicalProject(project.getProjects(), new LogicalLimit<>(1, 0, + LimitPhase.ORIGIN, project.child()))) + ); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java index a6916756c10184..da642e76610dc1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectToGlobalAggregate.java @@ -17,24 +17,13 @@ package org.apache.doris.nereids.rules.analysis; -import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.expressions.Alias; -import org.apache.doris.nereids.trees.expressions.NamedExpression; -import org.apache.doris.nereids.trees.expressions.Slot; -import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitors; -import org.apache.doris.nereids.trees.plans.LimitPhase; -import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; -import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; -import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import com.google.common.collect.ImmutableList; -import java.util.List; - /** * ProjectToGlobalAggregate. *

@@ -54,110 +43,17 @@ public class ProjectToGlobalAggregate extends OneAnalysisRuleFactory { @Override public Rule build() { return RuleType.PROJECT_TO_GLOBAL_AGGREGATE.build( - logicalProject().then(project -> { - project = distinctConstantsToLimit1(project); - Plan result = projectToAggregate(project); - return distinctToAggregate(result, project); - }) + logicalProject().then(project -> { + boolean needGlobalAggregate = project.getProjects() + .stream() + .anyMatch(p -> p.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null)); + + if (needGlobalAggregate) { + return new LogicalAggregate<>(ImmutableList.of(), project.getProjects(), project.child()); + } else { + return project; + } + }) ); } - - // select distinct 1,2,3 from tbl - // ↓ - // select 1,2,3 from (select 1, 2, 3 from tbl limit 1) as tmp - private static LogicalProject distinctConstantsToLimit1(LogicalProject project) { - if (!project.isDistinct()) { - return project; - } - - boolean allSelectItemAreConstants = true; - for (NamedExpression selectItem : project.getProjects()) { - if (!selectItem.isConstant()) { - allSelectItemAreConstants = false; - break; - } - } - - if (allSelectItemAreConstants) { - return new LogicalProject<>( - project.getProjects(), - new LogicalLimit<>(1, 0, LimitPhase.ORIGIN, project.child()) - ); - } - return project; - } - - // select avg(xxx) from tbl - // ↓ - // LogicalAggregate(groupBy=[], output=[avg(xxx)]) - private static Plan projectToAggregate(LogicalProject project) { - // contains aggregate functions, like sum, avg ? - for (NamedExpression selectItem : project.getProjects()) { - if (selectItem.accept(ExpressionVisitors.CONTAINS_AGGREGATE_CHECKER, null)) { - return new LogicalAggregate<>(ImmutableList.of(), project.getProjects(), project.child()); - } - } - return project; - } - - private static Plan distinctToAggregate(Plan result, LogicalProject originProject) { - if (!originProject.isDistinct()) { - return result; - } - if (result instanceof LogicalProject) { - // remove distinct: select distinct fun(xxx) as c1 from tbl - // - // LogicalProject(distinct=true, output=[fun(xxx) as c1]) - // ↓ - // LogicalAggregate(groupBy=[c1], output=[c1]) - // | - // LogicalProject(output=[fun(xxx) as c1]) - LogicalProject project = (LogicalProject) result; - - ImmutableList.Builder bottomProjectOutput - = ImmutableList.builderWithExpectedSize(project.getProjects().size()); - ImmutableList.Builder topAggOutput - = ImmutableList.builderWithExpectedSize(project.getProjects().size()); - - boolean hasComplexExpr = false; - for (NamedExpression selectItem : project.getProjects()) { - if (selectItem.isSlot()) { - topAggOutput.add(selectItem); - bottomProjectOutput.add(selectItem); - } else if (isAliasLiteral(selectItem)) { - // stay in agg, and eliminate by `ELIMINATE_GROUP_BY_CONSTANT` - topAggOutput.add(selectItem); - } else { - // `FillUpMissingSlots` not support find complex expr in aggregate, - // so we should push down into the bottom project - hasComplexExpr = true; - topAggOutput.add(selectItem.toSlot()); - bottomProjectOutput.add(selectItem); - } - } - - if (!hasComplexExpr) { - List projects = (List) project.getProjects(); - return new LogicalAggregate(projects, projects, project.child()); - } - - LogicalProject removeDistinct = new LogicalProject<>(bottomProjectOutput.build(), project.child()); - ImmutableList aggOutput = topAggOutput.build(); - return new LogicalAggregate(aggOutput, aggOutput, removeDistinct); - } else if (result instanceof LogicalAggregate) { - // remove distinct: select distinct avg(xxx) as c1 from tbl - // - // LogicalProject(distinct=true, output=[avg(xxx) as c1]) - // ↓ - // LogicalAggregate(output=[avg(xxx) as c1]) - return result; - } else { - // never reach - throw new AnalysisException("Unsupported"); - } - } - - private static boolean isAliasLiteral(NamedExpression selectItem) { - return selectItem instanceof Alias && selectItem.child(0) instanceof Literal; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java new file mode 100644 index 00000000000000..f858820d612ca4 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ProjectWithDistinctToAggregate.java @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +/** + * ProjectWithDistinctToAggregate. + *

+ * example sql: + *

+ * select distinct value from tbl
+ *
+ * LogicalProject(projects=[distinct value])
+ *            |
+ * LogicalOlapScan(table=tbl)
+ *          =>
+ * LogicalAggregate(groupBy=[value], output=[value])
+ *           |
+ * LogicalOlapScan(table=tbl)
+ *  
+ */ +public class ProjectWithDistinctToAggregate extends OneAnalysisRuleFactory { + @Override + public Rule build() { + return RuleType.PROJECT_WITH_DISTINCT_TO_AGGREGATE.build( + logicalProject() + .when(LogicalProject::isDistinct) + .whenNot(project -> project.getProjects().stream().anyMatch(this::hasAggregateFunction)) + .then(project -> new LogicalAggregate<>(project.getProjects(), false, project.child())) + ); + } + + private boolean hasAggregateFunction(Expression expression) { + return expression.anyMatch(AggregateFunction.class::isInstance); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java index 5dc85811d502ca..cd53086f96625d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ReplaceExpressionByChildOutput.java @@ -53,27 +53,21 @@ public List buildRules() { )) .add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build( logicalSort(logicalAggregate()).then(sort -> { - LogicalAggregate agg = sort.child(); - Map sMap = buildOutputAliasMap(agg.getOutputExpressions()); - if (sMap.isEmpty() && isSelectDistinct(agg)) { - sMap = getSelectDistinctExpressions(agg); - } + LogicalAggregate aggregate = sort.child(); + Map sMap = buildOutputAliasMap(aggregate.getOutputExpressions()); return replaceSortExpression(sort, sMap); }) )).add(RuleType.REPLACE_SORT_EXPRESSION_BY_CHILD_OUTPUT.build( logicalSort(logicalHaving(logicalAggregate())).then(sort -> { - LogicalAggregate agg = sort.child().child(); - Map sMap = buildOutputAliasMap(agg.getOutputExpressions()); - if (sMap.isEmpty() && isSelectDistinct(agg)) { - sMap = getSelectDistinctExpressions(agg); - } + LogicalAggregate aggregate = sort.child().child(); + Map sMap = buildOutputAliasMap(aggregate.getOutputExpressions()); return replaceSortExpression(sort, sMap); }) )) .build(); } - private static Map buildOutputAliasMap(List output) { + private Map buildOutputAliasMap(List output) { Map sMap = Maps.newHashMapWithExpectedSize(output.size()); for (NamedExpression expr : output) { if (expr instanceof Alias) { @@ -99,22 +93,4 @@ private LogicalPlan replaceSortExpression(LogicalSort sor return changed ? new LogicalSort<>(newKeys.build(), sort.child()) : sort; } - - private static boolean isSelectDistinct(LogicalAggregate agg) { - return agg.getGroupByExpressions().equals(agg.getOutputExpressions()) - && agg.getGroupByExpressions().equals(agg.child().getOutput()); - } - - private static Map getSelectDistinctExpressions(LogicalAggregate agg) { - Plan child = agg.child(); - List selectItems; - if (child instanceof LogicalProject) { - selectItems = ((LogicalProject) child).getProjects(); - } else if (child instanceof LogicalAggregate) { - selectItems = ((LogicalAggregate) child).getOutputExpressions(); - } else { - selectItems = ImmutableList.of(); - } - return buildOutputAliasMap(selectItems); - } } diff --git a/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy b/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy deleted file mode 100644 index ddcd2d47e8256b..00000000000000 --- a/regression-test/suites/nereids_p0/aggregate/select_distinct.groovy +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -suite("select_distinct") { - multi_sql """ - SET enable_nereids_planner=true; - SET enable_fallback_to_original_planner=false; - drop table if exists test_distinct_window; - create table test_distinct_window(id int) distributed by hash(id) properties('replication_num'='1'); - insert into test_distinct_window values(1), (2), (3); - """ - - test { - sql "select distinct sum(value) over(partition by id) from (select 100 value, 1 id union all select 100, 2)a" - result([[100L]]) - } - - test { - sql "select distinct value+1 from (select 100 value, 1 id union all select 100, 2)a order by value+1" - result([[101]]) - } - - test { - sql "select distinct 1, 2, 3 from test_distinct_window" - result([[1, 2, 3]]) - } - - test { - sql "select distinct sum(id) from test_distinct_window" - result([[6L]]) - } -}