From d857e39a22fe6a27fc8f4a52f3ab389558eab868 Mon Sep 17 00:00:00 2001 From: jackwener Date: Sat, 7 Oct 2023 19:33:28 +0800 Subject: [PATCH 1/3] [feature](Nereids): Pushdown LimitDistinct Through Join --- .../doris/nereids/jobs/executor/Rewriter.java | 2 + .../apache/doris/nereids/rules/RuleType.java | 3 + .../PushdownLimitDistinctThroughJoin.java | 109 ++++++++++++ .../rewrite/PushdownTopNThroughJoin.java | 35 ++-- .../trees/plans/logical/LogicalAggregate.java | 3 +- .../trees/plans/logical/LogicalLimit.java | 6 + .../trees/plans/logical/LogicalTopN.java | 7 + .../PushdownLimitDistinctThroughJoinTest.java | 167 ++++++++++++++++++ .../data/nereids_p0/join/test_limit_join.out | 125 +++++++++++++ .../nereids_p0/join/test_limit_join.groovy | 161 +++++++++++++++++ 10 files changed, 601 insertions(+), 17 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoin.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoinTest.java create mode 100644 regression-test/data/nereids_p0/join/test_limit_join.out create mode 100644 regression-test/suites/nereids_p0/join/test_limit_join.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 53b6980d0be542..fcdb3573836408 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -91,6 +91,7 @@ import org.apache.doris.nereids.rules.rewrite.PushProjectThroughUnion; import org.apache.doris.nereids.rules.rewrite.PushdownFilterThroughProject; import org.apache.doris.nereids.rules.rewrite.PushdownLimit; +import org.apache.doris.nereids.rules.rewrite.PushdownLimitDistinctThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushdownTopNThroughJoin; import org.apache.doris.nereids.rules.rewrite.PushdownTopNThroughWindow; import org.apache.doris.nereids.rules.rewrite.ReorderJoin; @@ -280,6 +281,7 @@ public class Rewriter extends AbstractBatchJobExecutor { new SplitLimit(), new PushdownLimit(), new PushdownTopNThroughJoin(), + new PushdownLimitDistinctThroughJoin(), new PushdownTopNThroughWindow(), new CreatePartitionTopNFromWindow() ) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index 3e1582b9d3eb97..7b00b524d77fdd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -252,6 +252,9 @@ public enum RuleType { PUSH_TOP_N_THROUGH_PROJECT_JOIN(RuleTypeClass.REWRITE), PUSH_TOP_N_THROUGH_PROJECT_WINDOW(RuleTypeClass.REWRITE), PUSH_TOP_N_THROUGH_WINDOW(RuleTypeClass.REWRITE), + // limit distinct push down + PUSH_LIMIT_DISTINCT_THROUGH_JOIN(RuleTypeClass.REWRITE), + PUSH_LIMIT_DISTINCT_THROUGH_PROJECT_JOIN(RuleTypeClass.REWRITE), // adjust nullable ADJUST_NULLABLE(RuleTypeClass.REWRITE), ADJUST_CONJUNCTS_RETURN_TYPE(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoin.java new file mode 100644 index 00000000000000..24737b6362528e --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoin.java @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; +import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; + +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.stream.Collectors; + +/** + * Same with PushdownLimit + */ +public class PushdownLimitDistinctThroughJoin implements RewriteRuleFactory { + + @Override + public List buildRules() { + return ImmutableList.of( + // limit -> distinct -> join + logicalLimit(logicalAggregate(logicalJoin()) + .when(LogicalAggregate::isDistinct)) + .then(limit -> { + LogicalAggregate> agg = limit.child(); + LogicalJoin join = agg.child(); + + Plan newJoin = pushLimitThroughJoin(limit, join); + if (newJoin == null || join.children().equals(newJoin.children())) { + return null; + } + return limit.withChildren(agg.withChildren(newJoin)); + }) + .toRule(RuleType.PUSH_LIMIT_DISTINCT_THROUGH_JOIN), + + // limit -> distinct -> project -> join + logicalLimit(logicalAggregate(logicalProject(logicalJoin()).when(LogicalProject::isAllSlots)) + .when(LogicalAggregate::isDistinct)) + .then(limit -> { + LogicalAggregate>> agg = limit.child(); + LogicalProject> project = agg.child(); + LogicalJoin join = project.child(); + + Plan newJoin = pushLimitThroughJoin(limit, join); + if (newJoin == null || join.children().equals(newJoin.children())) { + return null; + } + return limit.withChildren(agg.withChildren(project.withChildren(newJoin))); + }).toRule(RuleType.PUSH_LIMIT_DISTINCT_THROUGH_JOIN) + ); + } + + private Plan pushLimitThroughJoin(LogicalLimit limit, LogicalJoin join) { + LogicalAggregate agg = (LogicalAggregate) limit.child(); + List groupBySlots = agg.getGroupByExpressions().stream() + .flatMap(e -> e.getInputSlots().stream()).collect(Collectors.toList()); + switch (join.getJoinType()) { + case LEFT_OUTER_JOIN: + if (join.left().getOutputSet().containsAll(groupBySlots) + && join.left().getOutputSet().equals(agg.getOutputSet())) { + return join.withChildren(limit.withLimitChild(limit.getLimit() + limit.getOffset(), 0, + agg.withChildren(join.left())), join.right()); + } + return null; + case RIGHT_OUTER_JOIN: + if (join.right().getOutputSet().containsAll(groupBySlots) + && join.right().getOutputSet().equals(agg.getOutputSet())) { + return join.withChildren(join.left(), limit.withLimitChild(limit.getLimit() + limit.getOffset(), 0, + agg.withChildren(join.right()))); + } + return null; + case CROSS_JOIN: + if (join.left().getOutputSet().containsAll(groupBySlots) + && join.left().getOutputSet().equals(agg.getOutputSet())) { + return join.withChildren(limit.withLimitChild(limit.getLimit() + limit.getOffset(), 0, + agg.withChildren(join.left())), join.right()); + } else if (join.right().getOutputSet().containsAll(groupBySlots) + && join.right().getOutputSet().equals(agg.getOutputSet())) { + return join.withChildren(join.left(), limit.withLimitChild(limit.getLimit() + limit.getOffset(), 0, + agg.withChildren(join.right()))); + } else { + return null; + } + default: + return null; + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownTopNThroughJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownTopNThroughJoin.java index 8980664a9e1a3f..e72c7a051e84e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownTopNThroughJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushdownTopNThroughJoin.java @@ -25,12 +25,10 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; -import org.apache.doris.nereids.util.Utils; import com.google.common.collect.ImmutableList; import java.util.List; -import java.util.Set; import java.util.stream.Collectors; /** @@ -75,28 +73,33 @@ public List buildRules() { } private Plan pushLimitThroughJoin(LogicalTopN topN, LogicalJoin join) { + List orderbySlots = topN.getOrderKeys().stream().map(OrderKey::getExpr) + .flatMap(e -> e.getInputSlots().stream()).collect(Collectors.toList()); switch (join.getJoinType()) { case LEFT_OUTER_JOIN: - Set rightOutputSet = join.right().getOutputSet(); - if (topN.getOrderKeys().stream().map(OrderKey::getExpr) - .anyMatch(e -> Utils.isIntersecting(rightOutputSet, e.getInputSlots()))) { - return null; + if (join.left().getOutputSet().containsAll(orderbySlots)) { + return join.withChildren( + topN.withLimitChild(topN.getLimit() + topN.getOffset(), 0, join.left()), + join.right()); } - return join.withChildren(topN.withChildren(join.left()), join.right()); + return null; case RIGHT_OUTER_JOIN: - Set leftOutputSet = join.left().getOutputSet(); - if (topN.getOrderKeys().stream().map(OrderKey::getExpr) - .anyMatch(e -> Utils.isIntersecting(leftOutputSet, e.getInputSlots()))) { - return null; + if (join.right().getOutputSet().containsAll(orderbySlots)) { + return join.withChildren( + join.left(), + topN.withLimitChild(topN.getLimit() + topN.getOffset(), 0, join.right())); } - return join.withChildren(join.left(), topN.withChildren(join.right())); + return null; case CROSS_JOIN: - List orderbySlots = topN.getOrderKeys().stream().map(OrderKey::getExpr) - .flatMap(e -> e.getInputSlots().stream()).collect(Collectors.toList()); + if (join.left().getOutputSet().containsAll(orderbySlots)) { - return join.withChildren(topN.withChildren(join.left()), join.right()); + return join.withChildren( + topN.withLimitChild(topN.getLimit() + topN.getOffset(), 0, join.left()), + join.right()); } else if (join.right().getOutputSet().containsAll(orderbySlots)) { - return join.withChildren(join.left(), topN.withChildren(join.right())); + return join.withChildren( + join.left(), + topN.withLimitChild(topN.getLimit() + topN.getOffset(), 0, join.right())); } else { return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java index 1068798f673007..072993f5f95f63 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalAggregate.java @@ -161,7 +161,8 @@ public Optional> getSourceRepeat() { } public boolean isDistinct() { - return outputExpressions.equals(groupByExpressions); + return outputExpressions.stream().allMatch(e -> e instanceof Slot) + && groupByExpressions.stream().allMatch(e -> e instanceof Slot); } public boolean isGenerated() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java index ef6175808365ff..e896cf22402631 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalLimit.java @@ -117,6 +117,12 @@ public List getExpressions() { return ImmutableList.of(); } + public LogicalLimit withLimitChild(long limit, long offset, Plan child) { + Preconditions.checkArgument(children.size() == 1, + "LogicalTopN should have 1 child, but input is %s", children.size()); + return new LogicalLimit<>(limit, offset, phase, child); + } + @Override public Plan withGroupExpression(Optional groupExpression) { return new LogicalLimit<>(limit, offset, phase, groupExpression, Optional.of(getLogicalProperties()), child()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java index 80de9d6215aa19..02b239f173559a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalTopN.java @@ -122,6 +122,13 @@ public LogicalTopN withOrderKeys(List orderKeys) { Optional.empty(), Optional.of(getLogicalProperties()), child()); } + public LogicalTopN withLimitChild(long limit, long offset, Plan child) { + Preconditions.checkArgument(children.size() == 1, + "LogicalTopN should have 1 child, but input is %s", children.size()); + return new LogicalTopN<>(orderKeys, limit, offset, + Optional.empty(), Optional.of(getLogicalProperties()), child); + } + @Override public LogicalTopN withChildren(List children) { Preconditions.checkArgument(children.size() == 1, diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoinTest.java new file mode 100644 index 00000000000000..73a9baac8e4d0f --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushdownLimitDistinctThroughJoinTest.java @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.util.LogicalPlanBuilder; +import org.apache.doris.nereids.util.MemoPatternMatchSupported; +import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.nereids.util.PlanConstructor; +import org.apache.doris.utframe.TestWithFeService; + +import com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.Test; + +class PushdownLimitDistinctThroughJoinTest extends TestWithFeService implements MemoPatternMatchSupported { + private static final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + private static final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + + @Override + protected void runBeforeAll() throws Exception { + createDatabase("test"); + + connectContext.setDatabase("default_cluster:test"); + + createTable("CREATE TABLE `t1` (\n" + + " `k1` int(11) NULL,\n" + + " `k2` int(11) NULL\n" + + ") ENGINE=OLAP\n" + + "COMMENT 'OLAP'\n" + + "DISTRIBUTED BY HASH(`k1`) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_allocation\" = \"tag.location.default: 1\",\n" + + "\"in_memory\" = \"false\",\n" + + "\"storage_format\" = \"V2\",\n" + + "\"disable_auto_compaction\" = \"false\"\n" + + ");"); + + createTable("CREATE TABLE `t2` (\n" + + " `k1` int(11) NULL,\n" + + " `k2` int(11) NULL\n" + + ") ENGINE=OLAP\n" + + "COMMENT 'OLAP'\n" + + "DISTRIBUTED BY HASH(`k1`) BUCKETS 3\n" + + "PROPERTIES (\n" + + "\"replication_allocation\" = \"tag.location.default: 1\",\n" + + "\"in_memory\" = \"false\",\n" + + "\"storage_format\" = \"V2\",\n" + + "\"disable_auto_compaction\" = \"false\"\n" + + ");"); + } + + @Test + void testJoin() { + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.LEFT_OUTER_JOIN, Pair.of(0, 0)) + .distinct(ImmutableList.of(0, 1)) + .limit(10) + .build(); + PlanChecker.from(connectContext, plan) + .applyTopDown(new PushdownLimitDistinctThroughJoin()) + .matches( + logicalJoin( + logicalLimit(logicalAggregate(logicalOlapScan())).when(l -> l.getLimit() == 10), + logicalOlapScan() + ) + ); + + plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.RIGHT_OUTER_JOIN, Pair.of(0, 0)) + .distinct(ImmutableList.of(2, 3)) + .limit(10) + .build(); + PlanChecker.from(connectContext, plan) + .applyTopDown(new PushdownLimitDistinctThroughJoin()) + .matches( + logicalJoin( + logicalOlapScan(), + logicalLimit(logicalAggregate(logicalOlapScan())).when(l -> l.getLimit() == 10) + ) + ); + + plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.CROSS_JOIN, Pair.of(0, 0)) + .distinct(ImmutableList.of(0, 1)) + .limit(10) + .build(); + PlanChecker.from(connectContext, plan) + .applyTopDown(new PushdownLimitDistinctThroughJoin()) + .matches( + logicalJoin( + logicalLimit(logicalAggregate(logicalOlapScan())).when(l -> l.getLimit() == 10), + logicalOlapScan() + ) + ); + + plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.CROSS_JOIN, Pair.of(0, 0)) + .distinct(ImmutableList.of(2, 3)) + .limit(10) + .build(); + PlanChecker.from(connectContext, plan) + .applyTopDown(new PushdownLimitDistinctThroughJoin()) + .matches( + logicalJoin( + logicalOlapScan(), + logicalLimit(logicalAggregate(logicalOlapScan())).when(l -> l.getLimit() == 10) + ) + ); + } + + @Test + void testJoinSql() { + PlanChecker.from(connectContext) + .analyze("select t1.k1 from t1 left join t2 on t1.k1 = t2.k1 group by t1.k1 limit 10") + .rewrite() + .matches( + logicalProject(logicalJoin( + logicalLimit(logicalAggregate(logicalProject(logicalOlapScan()))) + .when(l -> l.getLimit() == 10), + logicalProject(logicalOlapScan()) + )) + ); + } + + @Test + void badCaseJoinType() { + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.LEFT_OUTER_JOIN, Pair.of(0, 0)) + .distinct(ImmutableList.of(2)) + .limit(10) + .build(); + PlanChecker.from(connectContext, plan) + .applyTopDown(new PushdownLimitDistinctThroughJoin()) + .matches(logicalJoin(logicalOlapScan(), logicalOlapScan())); + } + + @Test + void badCaseOutput() { + // distinct agg don't output all group by columns of left child + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.LEFT_OUTER_JOIN, Pair.of(0, 0)) + .distinct(ImmutableList.of(0)) + .limit(10) + .build(); + PlanChecker.from(connectContext, plan) + .applyTopDown(new PushdownLimitDistinctThroughJoin()) + .matches(logicalJoin(logicalOlapScan(), logicalOlapScan())); + } +} diff --git a/regression-test/data/nereids_p0/join/test_limit_join.out b/regression-test/data/nereids_p0/join/test_limit_join.out new file mode 100644 index 00000000000000..d5582af21b9837 --- /dev/null +++ b/regression-test/data/nereids_p0/join/test_limit_join.out @@ -0,0 +1,125 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !join1 -- +0 + +-- !join1_tree -- +LogicalResultSink[138] ( outputExprs=[c1#0] ) ++--LogicalLimit ( limit=1, offset=0, phase=GLOBAL ) + +--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) + +--LogicalAggregate[132] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) + +--LogicalProject[130] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalJoin[128] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) + | +--LogicalAggregate[122] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) + | +--LogicalProject[120] ( distinct=false, projects=[c1#0], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[125] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +-- !join2 -- +0 + +-- !join2_tree -- +LogicalResultSink[138] ( outputExprs=[c1#0] ) ++--LogicalLimit ( limit=1, offset=1, phase=GLOBAL ) + +--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) + +--LogicalAggregate[132] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) + +--LogicalProject[130] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalJoin[128] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) + | +--LogicalAggregate[122] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) + | +--LogicalProject[120] ( distinct=false, projects=[c1#0], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[125] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +-- !join3 -- +0 + +-- !join3_tree -- +LogicalResultSink[144] ( outputExprs=[c1#3] ) ++--LogicalLimit ( limit=1, offset=0, phase=GLOBAL ) + +--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) + +--LogicalAggregate[138] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) + +--LogicalProject[136] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalJoin[134] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) + | +--LogicalAggregate[128] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) + | +--LogicalProject[126] ( distinct=false, projects=[c1#3], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[131] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +-- !join4 -- +0 + +-- !join4_tree -- +LogicalResultSink[144] ( outputExprs=[c1#3] ) ++--LogicalLimit ( limit=1, offset=1, phase=GLOBAL ) + +--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) + +--LogicalAggregate[138] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) + +--LogicalProject[136] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalJoin[134] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) + | +--LogicalAggregate[128] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) + | +--LogicalProject[126] ( distinct=false, projects=[c1#3], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[131] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +-- !join5 -- +0 + +-- !join5_tree -- +LogicalResultSink[112] ( outputExprs=[c1#0] ) ++--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#0 asc null first] ) + +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalJoin[105] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#0 asc null first] ) + | +--LogicalProject[98] ( distinct=false, projects=[c1#0], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[102] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +-- !join6 -- +0 + +-- !join6_tree -- +LogicalResultSink[112] ( outputExprs=[c1#0] ) ++--LogicalTopN ( limit=1, offset=1, orderKeys=[c1#0 asc null first] ) + +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalJoin[105] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalTopN ( limit=2, offset=0, orderKeys=[c1#0 asc null first] ) + | +--LogicalProject[98] ( distinct=false, projects=[c1#0], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[102] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +-- !join7 -- +0 + +-- !join7_tree -- +LogicalResultSink[117] ( outputExprs=[c1#3] ) ++--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#3 asc null first] ) + +--LogicalProject[112] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalJoin[110] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#3 asc null first] ) + | +--LogicalProject[103] ( distinct=false, projects=[c1#3], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +-- !join8 -- +0 + +-- !join8_tree -- +LogicalResultSink[117] ( outputExprs=[c1#3] ) ++--LogicalTopN ( limit=1, offset=1, orderKeys=[c1#3 asc null first] ) + +--LogicalProject[112] ( distinct=false, projects=[c1#3], excepts=[] ) + +--LogicalJoin[110] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) + |--LogicalTopN ( limit=2, offset=0, orderKeys=[c1#3 asc null first] ) + | +--LogicalProject[103] ( distinct=false, projects=[c1#3], excepts=[] ) + | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) + +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) + diff --git a/regression-test/suites/nereids_p0/join/test_limit_join.groovy b/regression-test/suites/nereids_p0/join/test_limit_join.groovy new file mode 100644 index 00000000000000..aca30402100b79 --- /dev/null +++ b/regression-test/suites/nereids_p0/join/test_limit_join.groovy @@ -0,0 +1,161 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_limit_join", "nereids_p0") { + def DBname = "nereids_regression_test_limit_join" + sql "DROP DATABASE IF EXISTS ${DBname}" + sql "CREATE DATABASE IF NOT EXISTS ${DBname}" + sql "use ${DBname}" + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + def tbName1 = "t1" + def tbName2 = "t2" + + sql "DROP TABLE IF EXISTS ${tbName1};" + sql "DROP TABLE IF EXISTS ${tbName2};" + + sql """create table if not exists ${tbName1} (c1 int, c2 int) DISTRIBUTED BY HASH(c1) properties("replication_num" = "1");""" + sql """create table if not exists ${tbName2} (c1 int, c2 int, c3 int) DISTRIBUTED BY HASH(c1) properties("replication_num" = "1");""" + + sql "insert into ${tbName1} values (1,1);" + sql "insert into ${tbName1} values (2,2);" + sql "insert into ${tbName1} values (1,null);" + sql "insert into ${tbName1} values (2,null);" + sql "insert into ${tbName2} values (0,1,9999);" + sql "insert into ${tbName2} values (1,1,9999);" + sql "insert into ${tbName2} values (0,null,9999);" + sql "insert into ${tbName2} values (1,null,9999);" + + + /* test push limit-distinct through join */ + qt_join1 """ + SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + GROUP BY t1.c1 + limit 1; + """ + + qt_join1_tree """ + EXPLAIN REWRITTEN PLAN SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + GROUP BY t1.c1 + limit 1; + """ + + qt_join2 """ + SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + GROUP BY t1.c1 + LIMIT 1 OFFSET 1; + """ + + qt_join2_tree """ + EXPLAIN REWRITTEN PLAN SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + GROUP BY t1.c1 + LIMIT 1 OFFSET 1; + """ + + qt_join3 """ + SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + GROUP BY t2.c1 + limit 1; + """ + + qt_join3_tree """ + EXPLAIN REWRITTEN PLAN SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + GROUP BY t2.c1 + limit 1; + """ + + qt_join4 """ + SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + GROUP BY t2.c1 + LIMIT 1 OFFSET 1; + """ + + qt_join4_tree """ + EXPLAIN REWRITTEN PLAN SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + GROUP BY t2.c1 + LIMIT 1 OFFSET 1; + """ + + /* test push topN through join */ + qt_join5 """ + SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + ORDER BY t1.c1 + limit 1; + """ + + qt_join5_tree """ + EXPLAIN REWRITTEN PLAN SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + ORDER BY t1.c1 + limit 1; + """ + + qt_join6 """ + SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + ORDER BY t1.c1 + LIMIT 1 OFFSET 1; + """ + + qt_join6_tree """ + EXPLAIN REWRITTEN PLAN SELECT t1.c1 + FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + ORDER BY t1.c1 + LIMIT 1 OFFSET 1; + """ + + qt_join7 """ + SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + ORDER BY t2.c1 + limit 1; + """ + + qt_join7_tree """ + EXPLAIN REWRITTEN PLAN SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + ORDER BY t2.c1 + limit 1; + """ + + qt_join8 """ + SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + ORDER BY t2.c1 + LIMIT 1 OFFSET 1; + """ + + qt_join8_tree """ + EXPLAIN REWRITTEN PLAN SELECT t2.c1 + FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + ORDER BY t2.c1 + LIMIT 1 OFFSET 1; + """ + + sql "DROP DATABASE IF EXISTS ${DBname};" +} + From debbd9e45838d7b7565967c39ee40852e3fb7f4b Mon Sep 17 00:00:00 2001 From: jackwener Date: Sun, 8 Oct 2023 18:21:35 +0800 Subject: [PATCH 2/3] [feature](Nereids): Pushdown LimitDistinct Through Join --- .../data/nereids_p0/join/test_limit_join.out | 188 ++++++++++-------- .../nereids_p0/join/test_limit_join.groovy | 16 +- 2 files changed, 112 insertions(+), 92 deletions(-) diff --git a/regression-test/data/nereids_p0/join/test_limit_join.out b/regression-test/data/nereids_p0/join/test_limit_join.out index d5582af21b9837..1aa973cfdc0a75 100644 --- a/regression-test/data/nereids_p0/join/test_limit_join.out +++ b/regression-test/data/nereids_p0/join/test_limit_join.out @@ -3,123 +3,143 @@ 0 -- !join1_tree -- -LogicalResultSink[138] ( outputExprs=[c1#0] ) -+--LogicalLimit ( limit=1, offset=0, phase=GLOBAL ) - +--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) - +--LogicalAggregate[132] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) - +--LogicalProject[130] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalJoin[128] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) - | +--LogicalAggregate[122] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) - | +--LogicalProject[120] ( distinct=false, projects=[c1#0], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[125] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalLimit +----PhysicalDistribute +------PhysicalLimit +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +--------------PhysicalLimit +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------PhysicalOlapScan[t2] +--------------PhysicalProject +----------------PhysicalOlapScan[t2] -- !join2 -- 0 -- !join2_tree -- -LogicalResultSink[138] ( outputExprs=[c1#0] ) -+--LogicalLimit ( limit=1, offset=1, phase=GLOBAL ) - +--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) - +--LogicalAggregate[132] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) - +--LogicalProject[130] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalJoin[128] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) - | +--LogicalAggregate[122] ( groupByExpr=[c1#0], outputExpr=[c1#0], hasRepeat=false ) - | +--LogicalProject[120] ( distinct=false, projects=[c1#0], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[125] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalLimit +----PhysicalDistribute +------PhysicalLimit +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +--------------PhysicalLimit +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------PhysicalOlapScan[t2] +--------------PhysicalProject +----------------PhysicalOlapScan[t2] -- !join3 -- 0 -- !join3_tree -- -LogicalResultSink[144] ( outputExprs=[c1#3] ) -+--LogicalLimit ( limit=1, offset=0, phase=GLOBAL ) - +--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) - +--LogicalAggregate[138] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) - +--LogicalProject[136] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalJoin[134] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalLimit ( limit=1, offset=0, phase=LOCAL ) - | +--LogicalAggregate[128] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) - | +--LogicalProject[126] ( distinct=false, projects=[c1#3], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[131] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalLimit +----PhysicalDistribute +------PhysicalLimit +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +--------------PhysicalLimit +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------PhysicalOlapScan[t2] +--------------PhysicalProject +----------------PhysicalOlapScan[t2] -- !join4 -- 0 -- !join4_tree -- -LogicalResultSink[144] ( outputExprs=[c1#3] ) -+--LogicalLimit ( limit=1, offset=1, phase=GLOBAL ) - +--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) - +--LogicalAggregate[138] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) - +--LogicalProject[136] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalJoin[134] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalLimit ( limit=2, offset=0, phase=LOCAL ) - | +--LogicalAggregate[128] ( groupByExpr=[c1#3], outputExpr=[c1#3], hasRepeat=false ) - | +--LogicalProject[126] ( distinct=false, projects=[c1#3], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[131] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalLimit +----PhysicalDistribute +------PhysicalLimit +--------hashAgg[LOCAL] +----------PhysicalProject +------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +--------------PhysicalLimit +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------PhysicalOlapScan[t2] +--------------PhysicalProject +----------------PhysicalOlapScan[t2] -- !join5 -- 0 -- !join5_tree -- -LogicalResultSink[112] ( outputExprs=[c1#0] ) -+--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#0 asc null first] ) - +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalJoin[105] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#0 asc null first] ) - | +--LogicalProject[98] ( distinct=false, projects=[c1#0], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[102] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalTopN +----PhysicalTopN +------PhysicalProject +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +----------PhysicalTopN +------------PhysicalDistribute +--------------PhysicalTopN +----------------PhysicalProject +------------------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalProject +--------------PhysicalOlapScan[t2] -- !join6 -- 0 -- !join6_tree -- -LogicalResultSink[112] ( outputExprs=[c1#0] ) -+--LogicalTopN ( limit=1, offset=1, orderKeys=[c1#0 asc null first] ) - +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalJoin[105] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalTopN ( limit=2, offset=0, orderKeys=[c1#0 asc null first] ) - | +--LogicalProject[98] ( distinct=false, projects=[c1#0], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[102] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalTopN +----PhysicalTopN +------PhysicalProject +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +----------PhysicalTopN +------------PhysicalDistribute +--------------PhysicalTopN +----------------PhysicalProject +------------------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalProject +--------------PhysicalOlapScan[t2] -- !join7 -- 0 -- !join7_tree -- -LogicalResultSink[117] ( outputExprs=[c1#3] ) -+--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#3 asc null first] ) - +--LogicalProject[112] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalJoin[110] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalTopN ( limit=1, offset=0, orderKeys=[c1#3 asc null first] ) - | +--LogicalProject[103] ( distinct=false, projects=[c1#3], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalTopN +----PhysicalTopN +------PhysicalProject +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +----------PhysicalTopN +------------PhysicalDistribute +--------------PhysicalTopN +----------------PhysicalProject +------------------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalProject +--------------PhysicalOlapScan[t2] -- !join8 -- 0 -- !join8_tree -- -LogicalResultSink[117] ( outputExprs=[c1#3] ) -+--LogicalTopN ( limit=1, offset=1, orderKeys=[c1#3 asc null first] ) - +--LogicalProject[112] ( distinct=false, projects=[c1#3], excepts=[] ) - +--LogicalJoin[110] ( type=LEFT_OUTER_JOIN, markJoinSlotReference=Optional.empty, hashJoinConjuncts=[(c1#0 = c1#3)], otherJoinConjuncts=[] ) - |--LogicalTopN ( limit=2, offset=0, orderKeys=[c1#3 asc null first] ) - | +--LogicalProject[103] ( distinct=false, projects=[c1#3], excepts=[] ) - | +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) - +--LogicalProject[107] ( distinct=false, projects=[c1#0], excepts=[] ) - +--LogicalOlapScan ( qualified=default_cluster:nereids_regression_test_limit_join.t2, indexName=t2, selectedIndexId=10306, preAgg=ON ) +PhysicalResultSink +--PhysicalTopN +----PhysicalTopN +------PhysicalProject +--------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() +----------PhysicalTopN +------------PhysicalDistribute +--------------PhysicalTopN +----------------PhysicalProject +------------------PhysicalOlapScan[t2] +----------PhysicalDistribute +------------PhysicalProject +--------------PhysicalOlapScan[t2] diff --git a/regression-test/suites/nereids_p0/join/test_limit_join.groovy b/regression-test/suites/nereids_p0/join/test_limit_join.groovy index aca30402100b79..df690670b3e8a1 100644 --- a/regression-test/suites/nereids_p0/join/test_limit_join.groovy +++ b/regression-test/suites/nereids_p0/join/test_limit_join.groovy @@ -51,7 +51,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join1_tree """ - EXPLAIN REWRITTEN PLAN SELECT t1.c1 + EXPLAIN SHAPE PLAN SELECT t1.c1 FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 limit 1; @@ -65,7 +65,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join2_tree """ - EXPLAIN REWRITTEN PLAN SELECT t1.c1 + EXPLAIN SHAPE PLAN SELECT t1.c1 FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 LIMIT 1 OFFSET 1; @@ -79,7 +79,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join3_tree """ - EXPLAIN REWRITTEN PLAN SELECT t2.c1 + EXPLAIN SHAPE PLAN SELECT t2.c1 FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 GROUP BY t2.c1 limit 1; @@ -93,7 +93,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join4_tree """ - EXPLAIN REWRITTEN PLAN SELECT t2.c1 + EXPLAIN SHAPE PLAN SELECT t2.c1 FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 GROUP BY t2.c1 LIMIT 1 OFFSET 1; @@ -108,7 +108,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join5_tree """ - EXPLAIN REWRITTEN PLAN SELECT t1.c1 + EXPLAIN SHAPE PLAN SELECT t1.c1 FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 ORDER BY t1.c1 limit 1; @@ -122,7 +122,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join6_tree """ - EXPLAIN REWRITTEN PLAN SELECT t1.c1 + EXPLAIN SHAPE PLAN SELECT t1.c1 FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 ORDER BY t1.c1 LIMIT 1 OFFSET 1; @@ -136,7 +136,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join7_tree """ - EXPLAIN REWRITTEN PLAN SELECT t2.c1 + EXPLAIN SHAPE PLAN SELECT t2.c1 FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 ORDER BY t2.c1 limit 1; @@ -150,7 +150,7 @@ suite("test_limit_join", "nereids_p0") { """ qt_join8_tree """ - EXPLAIN REWRITTEN PLAN SELECT t2.c1 + EXPLAIN SHAPE PLAN SELECT t2.c1 FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 ORDER BY t2.c1 LIMIT 1 OFFSET 1; From 70580ab7ef12afe3a186478a208d10b03b3b5ff8 Mon Sep 17 00:00:00 2001 From: jackwener Date: Sun, 8 Oct 2023 21:25:49 +0800 Subject: [PATCH 3/3] fix regression test --- .../data/nereids_p0/join/test_limit_join.out | 136 +----------------- .../nereids_p0/join/test_limit_join.groovy | 88 +++--------- 2 files changed, 23 insertions(+), 201 deletions(-) diff --git a/regression-test/data/nereids_p0/join/test_limit_join.out b/regression-test/data/nereids_p0/join/test_limit_join.out index 1aa973cfdc0a75..311d110c2bb03f 100644 --- a/regression-test/data/nereids_p0/join/test_limit_join.out +++ b/regression-test/data/nereids_p0/join/test_limit_join.out @@ -1,145 +1,23 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !join1 -- -0 - --- !join1_tree -- -PhysicalResultSink ---PhysicalLimit -----PhysicalDistribute -------PhysicalLimit ---------hashAgg[LOCAL] -----------PhysicalProject -------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() ---------------PhysicalLimit -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------PhysicalOlapScan[t2] ---------------PhysicalProject -----------------PhysicalOlapScan[t2] - --- !join2 -- -0 - --- !join2_tree -- -PhysicalResultSink ---PhysicalLimit -----PhysicalDistribute -------PhysicalLimit ---------hashAgg[LOCAL] -----------PhysicalProject -------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() ---------------PhysicalLimit -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------PhysicalOlapScan[t2] ---------------PhysicalProject -----------------PhysicalOlapScan[t2] +1 +2 -- !join3 -- 0 - --- !join3_tree -- -PhysicalResultSink ---PhysicalLimit -----PhysicalDistribute -------PhysicalLimit ---------hashAgg[LOCAL] -----------PhysicalProject -------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() ---------------PhysicalLimit -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------PhysicalOlapScan[t2] ---------------PhysicalProject -----------------PhysicalOlapScan[t2] - --- !join4 -- -0 - --- !join4_tree -- -PhysicalResultSink ---PhysicalLimit -----PhysicalDistribute -------PhysicalLimit ---------hashAgg[LOCAL] -----------PhysicalProject -------------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() ---------------PhysicalLimit -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------PhysicalOlapScan[t2] ---------------PhysicalProject -----------------PhysicalOlapScan[t2] +1 -- !join5 -- -0 - --- !join5_tree -- -PhysicalResultSink ---PhysicalTopN -----PhysicalTopN -------PhysicalProject ---------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() -----------PhysicalTopN -------------PhysicalDistribute ---------------PhysicalTopN -----------------PhysicalProject -------------------PhysicalOlapScan[t2] -----------PhysicalDistribute -------------PhysicalProject ---------------PhysicalOlapScan[t2] +1 +1 -- !join6 -- -0 - --- !join6_tree -- -PhysicalResultSink ---PhysicalTopN -----PhysicalTopN -------PhysicalProject ---------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() -----------PhysicalTopN -------------PhysicalDistribute ---------------PhysicalTopN -----------------PhysicalProject -------------------PhysicalOlapScan[t2] -----------PhysicalDistribute -------------PhysicalProject ---------------PhysicalOlapScan[t2] +1 -- !join7 -- 0 - --- !join7_tree -- -PhysicalResultSink ---PhysicalTopN -----PhysicalTopN -------PhysicalProject ---------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() -----------PhysicalTopN -------------PhysicalDistribute ---------------PhysicalTopN -----------------PhysicalProject -------------------PhysicalOlapScan[t2] -----------PhysicalDistribute -------------PhysicalProject ---------------PhysicalOlapScan[t2] +0 -- !join8 -- 0 --- !join8_tree -- -PhysicalResultSink ---PhysicalTopN -----PhysicalTopN -------PhysicalProject ---------hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.c1 = t2.c1))otherCondition=() -----------PhysicalTopN -------------PhysicalDistribute ---------------PhysicalTopN -----------------PhysicalProject -------------------PhysicalOlapScan[t2] -----------PhysicalDistribute -------------PhysicalProject ---------------PhysicalOlapScan[t2] - diff --git a/regression-test/suites/nereids_p0/join/test_limit_join.groovy b/regression-test/suites/nereids_p0/join/test_limit_join.groovy index df690670b3e8a1..8f4cbf88f346bc 100644 --- a/regression-test/suites/nereids_p0/join/test_limit_join.groovy +++ b/regression-test/suites/nereids_p0/join/test_limit_join.groovy @@ -43,58 +43,30 @@ suite("test_limit_join", "nereids_p0") { /* test push limit-distinct through join */ - qt_join1 """ + order_qt_join1 """ SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 left join ${tbName2} t2 on t1.c1 = t2.c1 GROUP BY t1.c1 - limit 1; + limit 2; """ - qt_join1_tree """ - EXPLAIN SHAPE PLAN SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 - GROUP BY t1.c1 - limit 1; - """ - - qt_join2 """ + sql """ SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 - GROUP BY t1.c1 - LIMIT 1 OFFSET 1; - """ - - qt_join2_tree """ - EXPLAIN SHAPE PLAN SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 left join ${tbName2} t2 on t1.c1 = t2.c1 GROUP BY t1.c1 LIMIT 1 OFFSET 1; """ - qt_join3 """ + order_qt_join3 """ SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 right join ${tbName2} t2 on t1.c1 = t2.c1 GROUP BY t2.c1 - limit 1; - """ - - qt_join3_tree """ - EXPLAIN SHAPE PLAN SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 - GROUP BY t2.c1 - limit 1; + limit 2; """ - qt_join4 """ + sql """ SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 - GROUP BY t2.c1 - LIMIT 1 OFFSET 1; - """ - - qt_join4_tree """ - EXPLAIN SHAPE PLAN SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 right join ${tbName2} t2 on t1.c1 = t2.c1 GROUP BY t2.c1 LIMIT 1 OFFSET 1; """ @@ -102,56 +74,28 @@ suite("test_limit_join", "nereids_p0") { /* test push topN through join */ qt_join5 """ SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 left join ${tbName2} t2 on t1.c1 = t2.c1 ORDER BY t1.c1 - limit 1; - """ - - qt_join5_tree """ - EXPLAIN SHAPE PLAN SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 - ORDER BY t1.c1 - limit 1; + limit 2; """ qt_join6 """ SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 - ORDER BY t1.c1 - LIMIT 1 OFFSET 1; - """ - - qt_join6_tree """ - EXPLAIN SHAPE PLAN SELECT t1.c1 - FROM ${tbName2} t1 left join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 left join ${tbName2} t2 on t1.c1 = t2.c1 ORDER BY t1.c1 LIMIT 1 OFFSET 1; """ qt_join7 """ SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 right join ${tbName2} t2 on t1.c1 = t2.c1 ORDER BY t2.c1 - limit 1; - """ - - qt_join7_tree """ - EXPLAIN SHAPE PLAN SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 - ORDER BY t2.c1 - limit 1; + limit 2; """ qt_join8 """ SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 - ORDER BY t2.c1 - LIMIT 1 OFFSET 1; - """ - - qt_join8_tree """ - EXPLAIN SHAPE PLAN SELECT t2.c1 - FROM ${tbName2} t1 right join t2 on t1.c1 = t2.c1 + FROM ${tbName1} t1 right join ${tbName2} t2 on t1.c1 = t2.c1 ORDER BY t2.c1 LIMIT 1 OFFSET 1; """