Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.util.PlanUtils;

Expand All @@ -36,9 +37,10 @@
public class PullUpProjectUnderLimit extends OneRewriteRuleFactory {
@Override
public Rule build() {
return logicalLimit(logicalProject().whenNot(p -> p.isAllSlots()))
return logicalLimit(logicalProject(logicalJoin().when(j -> j.getJoinType().isLeftRightOuterOrCrossJoin()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it reduce the scope of this rule if adding the join pattern?

.whenNot(p -> p.isAllSlots()))
.then(limit -> {
LogicalProject<Plan> project = limit.child();
LogicalProject<LogicalJoin<Plan, Plan>> project = limit.child();
Set<Slot> allUsedSlots = project.getProjects().stream().flatMap(ne -> ne.getInputSlots().stream())
.collect(Collectors.toSet());
Set<Slot> outputSet = project.child().getOutputSet();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,41 +20,61 @@
import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalJoin;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.PlanUtils;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
* Pull up Project under TopN.
* Pull up Project under TopN for PushDownTopNThroughJoin
*/
public class PullUpProjectUnderTopN extends OneRewriteRuleFactory {
@Override
public Rule build() {
return logicalTopN(logicalProject().whenNot(p -> p.isAllSlots()))
.whenNot(topN -> topN.child().hasPushedDownToProjectionFunctions())
return logicalTopN(
logicalProject(logicalJoin().when(j -> j.getJoinType().isLeftRightOuterOrCrossJoin()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar question, will it reduce this rule's scope if adding the join pattern judge?

.whenNot(p -> p.isAllSlots()))
.then(topN -> {
LogicalProject<Plan> project = topN.child();
LogicalProject<LogicalJoin<Plan, Plan>> project = topN.child();
Set<Slot> outputSet = project.child().getOutputSet();
if (!topN.getOrderKeys().stream().map(OrderKey::getExpr).flatMap(e -> e.getInputSlots().stream())
.allMatch(outputSet::contains)) {
return null;

Map<Slot, Expression> slotMap = ExpressionUtils.generateReplaceMap(project.getProjects());
List<OrderKey> newOrderKeys = new ArrayList<>();
for (OrderKey orderKey : topN.getOrderKeys()) {
if (!(orderKey.getExpr() instanceof Slot)) {
return null;
}
Expression expression = slotMap.get((Slot) orderKey.getExpr());
if (expression instanceof Slot) {
newOrderKeys.add(orderKey.withExpression(expression));
} else {
return null;
}
}

Set<Slot> allUsedSlots = project.getProjects().stream().flatMap(ne -> ne.getInputSlots().stream())
.collect(Collectors.toSet());
LogicalTopN<Plan> newTopN = topN.withOrderKeys(newOrderKeys);
if (outputSet.size() == allUsedSlots.size()) {
Preconditions.checkState(outputSet.equals(allUsedSlots));
return project.withChildren(topN.withChildren(project.child()));
return project.withChildren(newTopN.withChildren(project.child()));
} else {
Plan columnProject = PlanUtils.projectOrSelf(ImmutableList.copyOf(allUsedSlots),
project.child());
return project.withChildren(topN.withChildren(columnProject));
return project.withChildren(newTopN.withChildren(columnProject));
}
}).toRule(RuleType.PULL_UP_PROJECT_UNDER_TOPN);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ public final boolean isRightOuterJoin() {
return this == RIGHT_OUTER_JOIN;
}

public final boolean isLeftRightOuterOrCrossJoin() {
return this == LEFT_OUTER_JOIN || this == RIGHT_OUTER_JOIN || this == CROSS_JOIN;
}

public final boolean isLeftSemiOrAntiJoin() {
return this == LEFT_SEMI_JOIN || this == LEFT_ANTI_JOIN || this == NULL_AWARE_LEFT_ANTI_JOIN;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.types.VarcharType;
Expand All @@ -35,14 +36,16 @@

class PullUpProjectUnderLimitTest implements MemoPatternMatchSupported {
private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0);

@Test
void test() {
List<NamedExpression> exprs = ImmutableList.of(
scan1.getOutput().get(0),
scan1.getOutput().get(0).alias("id"),
new Cast(scan1.getOutput().get(1), VarcharType.SYSTEM_DEFAULT).alias("cast")
);
LogicalPlan limit = new LogicalPlanBuilder(scan1)
.join(scan2, JoinType.LEFT_OUTER_JOIN, ImmutableList.of())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to cover right outer and cross? since the code handle these two kinds.

.projectExprs(exprs)
.limit(0, 0)
.build();
Expand All @@ -52,7 +55,7 @@ void test() {
.matches(
logicalProject(
logicalLimit(
logicalOlapScan()
logicalJoin()
)
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.NamedExpression;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.types.VarcharType;
Expand All @@ -35,24 +36,27 @@

class PullUpProjectUnderTopNTest implements MemoPatternMatchSupported {
private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not add a new ut to cover the join with select alias, avoid overwrite the original one?


@Test
void test() {
List<NamedExpression> exprs = ImmutableList.of(
scan1.getOutput().get(0),
scan1.getOutput().get(0).alias("id"),
new Cast(scan1.getOutput().get(1), VarcharType.SYSTEM_DEFAULT).alias("cast")
);
LogicalPlan limit = new LogicalPlanBuilder(scan1)
.join(scan2, JoinType.LEFT_OUTER_JOIN, ImmutableList.of())
.projectExprs(exprs)
.topN(0, 0, ImmutableList.of(0))
.build();

PlanChecker.from(MemoTestUtils.createConnectContext(), limit)
.applyTopDown(new PullUpProjectUnderTopN())
.printlnTree()
.matches(
logicalProject(
logicalTopN(
logicalOlapScan()
logicalJoin()
)
)
);
Expand Down
8 changes: 4 additions & 4 deletions regression-test/data/nereids_clickbench_shape_p0/query35.out
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ckbench_shape_35 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
Expand Down
8 changes: 4 additions & 4 deletions regression-test/data/nereids_clickbench_shape_p0/query36.out
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ckbench_shape_36 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
Expand Down
8 changes: 4 additions & 4 deletions regression-test/data/nereids_clickbench_shape_p0/query40.out
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ckbench_shape_40 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ds_shape_10 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ds_shape_17 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,37 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ds_shape_27 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
------------hashAgg[GLOBAL]
--------------PhysicalDistribute[DistributionSpecHash]
----------------hashAgg[LOCAL]
------------------PhysicalRepeat
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk]
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
--------------------------------PhysicalProject
----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[ss_cdemo_sk]
------------------------------------PhysicalProject
--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
------------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------------PhysicalProject
----------------------------------------filter((customer_demographics.cd_education_status = 'Secondary') and (customer_demographics.cd_gender = 'M') and (customer_demographics.cd_marital_status = 'W'))
------------------------------------------PhysicalOlapScan[customer_demographics]
--------------------------------PhysicalDistribute[DistributionSpecReplicated]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
----------------PhysicalRepeat
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk]
----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk]
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk)) otherCondition=() build RFs:RF0 cd_demo_sk->[ss_cdemo_sk]
----------------------------------PhysicalProject
------------------------------------filter((store.s_state = 'TN'))
--------------------------------------PhysicalOlapScan[store]
------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3
----------------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------------PhysicalProject
--------------------------------------filter((customer_demographics.cd_education_status = 'Secondary') and (customer_demographics.cd_gender = 'M') and (customer_demographics.cd_marital_status = 'W'))
----------------------------------------PhysicalOlapScan[customer_demographics]
------------------------------PhysicalDistribute[DistributionSpecReplicated]
--------------------------------PhysicalProject
----------------------------------filter((date_dim.d_year = 1999))
------------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalDistribute[DistributionSpecHash]
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[item]
----------------------------------filter((store.s_state = 'TN'))
------------------------------------PhysicalOlapScan[store]
----------------------------PhysicalDistribute[DistributionSpecReplicated]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------PhysicalOlapScan[item]

Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ds_shape_35 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !ds_shape_54 --
PhysicalResultSink
--PhysicalProject
----PhysicalTopN[MERGE_SORT]
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
--PhysicalTopN[MERGE_SORT]
----PhysicalDistribute[DistributionSpecGather]
------PhysicalTopN[LOCAL_SORT]
--------PhysicalProject
----------hashAgg[GLOBAL]
------------PhysicalDistribute[DistributionSpecHash]
--------------hashAgg[LOCAL]
Expand Down
Loading