From 0b27dd58ac129315a19d16a38464bb6678918cea Mon Sep 17 00:00:00 2001 From: morrySnow Date: Thu, 2 Nov 2023 19:25:40 +0800 Subject: [PATCH] [fix](Nereids) RewriteCteChildren not work with cost based rewritter we use a map to record rewrite cte children result to avoid rewrite twice in cost based rewritter. However, we record cte outer and inner in one map, and use null as outer result's key, use cte id as inner result's key. This is wrong, because every anchor has an outer, and we could only record one outer. So when we use the cache in cost based rewritter, we get wrong outer plan from the cache. Then the error will be thrown as below: ``` Caused by: java.lang.IllegalArgumentException: Stats for CTE: CTEId#1 not found at com.google.common.base.Preconditions.checkArgument(Preconditions.java:143) ~[guava-32.1.2-jre.jar:?] at org.apache.doris.nereids.stats.StatsCalculator.visitLogicalCTEConsumer(StatsCalculator.java:1049) ~[classes/:?] at org.apache.doris.nereids.stats.StatsCalculator.visitLogicalCTEConsumer(StatsCalculator.java:147) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer.accept(LogicalCTEConsumer.java:111) ~[classes/:?] at org.apache.doris.nereids.stats.StatsCalculator.estimate(StatsCalculator.java:222) ~[classes/:?] at org.apache.doris.nereids.stats.StatsCalculator.estimate(StatsCalculator.java:200) ~[classes/:?] at org.apache.doris.nereids.jobs.cascades.DeriveStatsJob.execute(DeriveStatsJob.java:108) ~[classes/:?] at org.apache.doris.nereids.jobs.scheduler.SimpleJobScheduler.executeJobPool(SimpleJobScheduler.java:39) ~[classes/:?] at org.apache.doris.nereids.jobs.executor.Optimizer.execute(Optimizer.java:51) ~[classes/:?] at org.apache.doris.nereids.jobs.rewrite.CostBasedRewriteJob.getCost(CostBasedRewriteJob.java:98) ~[classes/:?] at org.apache.doris.nereids.jobs.rewrite.CostBasedRewriteJob.execute(CostBasedRewriteJob.java:64) ~[classes/:?] at org.apache.doris.nereids.jobs.executor.AbstractBatchJobExecutor.execute(AbstractBatchJobExecutor.java:119) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visit(RewriteCteChildren.java:72) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visit(RewriteCteChildren.java:56) ~[classes/:?] at org.apache.doris.nereids.trees.plans.visitor.PlanVisitor.visitLogicalSink(PlanVisitor.java:118) ~[classes/:?] at org.apache.doris.nereids.trees.plans.visitor.SinkVisitor.visitLogicalResultSink(SinkVisitor.java:72) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalResultSink.accept(LogicalResultSink.java:58) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:86) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:56) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor.accept(LogicalCTEAnchor.java:60) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:86) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.visitLogicalCTEAnchor(RewriteCteChildren.java:56) ~[classes/:?] at org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor.accept(LogicalCTEAnchor.java:60) ~[classes/:?] at org.apache.doris.nereids.rules.rewrite.RewriteCteChildren.rewriteRoot(RewriteCteChildren.java:67) ~[classes/:?] at org.apache.doris.nereids.jobs.rewrite.CustomRewriteJob.execute(CustomRewriteJob.java:58) ~[classes/:?] at org.apache.doris.nereids.jobs.executor.AbstractBatchJobExecutor.execute(AbstractBatchJobExecutor.java:119) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.rewrite(NereidsPlanner.java:275) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.plan(NereidsPlanner.java:218) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.plan(NereidsPlanner.java:118) ~[classes/:?] at org.apache.doris.nereids.trees.plans.commands.ExplainCommand.run(ExplainCommand.java:81) ~[classes/:?] at org.apache.doris.qe.StmtExecutor.executeByNereids(StmtExecutor.java:550) ~[classes/:?] ``` --- .../org/apache/doris/nereids/StatementContext.java | 11 ++++++++--- .../nereids/jobs/rewrite/CostBasedRewriteJob.java | 2 +- .../nereids/rules/rewrite/RewriteCteChildren.java | 12 ++++++------ regression-test/suites/nereids_syntax_p0/cte.groovy | 5 +++++ 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 13415c504f1ff4..16ad3bc0bd1597 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -85,7 +85,8 @@ public class StatementContext { private final Map> cteIdToConsumerUnderProjects = new HashMap<>(); // Used to update consumer's stats private final Map, Group>>> cteIdToConsumerGroup = new HashMap<>(); - private final Map rewrittenCtePlan = new HashMap<>(); + private final Map rewrittenCteProducer = new HashMap<>(); + private final Map rewrittenCteConsumer = new HashMap<>(); private final Map hintMap = Maps.newLinkedHashMap(); private final Set viewDdlSqlSet = Sets.newHashSet(); @@ -230,8 +231,12 @@ public Map, Group>>> getCteIdToConsumerGroup() return cteIdToConsumerGroup; } - public Map getRewrittenCtePlan() { - return rewrittenCtePlan; + public Map getRewrittenCteProducer() { + return rewrittenCteProducer; + } + + public Map getRewrittenCteConsumer() { + return rewrittenCteConsumer; } public void addViewDdlSql(String ddlSql) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java index 2e5132f4ddd4ed..2a7f0903b2501c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java @@ -89,7 +89,7 @@ private Optional> getCost(CascadesContext currentCtx CascadesContext rootCtx = currentCtx.getRoot(); if (rootCtx.getRewritePlan() instanceof LogicalCTEAnchor) { // set subtree rewrite cache - currentCtx.getStatementContext().getRewrittenCtePlan() + currentCtx.getStatementContext().getRewrittenCteProducer() .put(currentCtx.getCurrentTree().orElse(null), (LogicalPlan) cboCtx.getRewritePlan()); // Do Whole tree rewrite CascadesContext rootCtxCopy = CascadesContext.newCurrentTreeContext(rootCtx); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java index 5aa286e67f9c27..3318f9990d8b40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java @@ -77,14 +77,14 @@ public Plan visit(Plan plan, CascadesContext context) { public Plan visitLogicalCTEAnchor(LogicalCTEAnchor cteAnchor, CascadesContext cascadesContext) { LogicalPlan outer; - if (cascadesContext.getStatementContext().getRewrittenCtePlan().containsKey(null)) { - outer = cascadesContext.getStatementContext().getRewrittenCtePlan().get(null); + if (cascadesContext.getStatementContext().getRewrittenCteConsumer().containsKey(cteAnchor.getCteId())) { + outer = cascadesContext.getStatementContext().getRewrittenCteProducer().get(cteAnchor.getCteId()); } else { CascadesContext outerCascadesCtx = CascadesContext.newSubtreeContext( Optional.empty(), cascadesContext, cteAnchor.child(1), cascadesContext.getCurrentJobContext().getRequiredProperties()); outer = (LogicalPlan) cteAnchor.child(1).accept(this, outerCascadesCtx); - cascadesContext.getStatementContext().getRewrittenCtePlan().put(null, outer); + cascadesContext.getStatementContext().getRewrittenCteConsumer().put(cteAnchor.getCteId(), outer); } boolean reserveAnchor = outer.anyMatch(p -> { if (p instanceof LogicalCTEConsumer) { @@ -104,8 +104,8 @@ public Plan visitLogicalCTEAnchor(LogicalCTEAnchor cteProducer, CascadesContext cascadesContext) { LogicalPlan child; - if (cascadesContext.getStatementContext().getRewrittenCtePlan().containsKey(cteProducer.getCteId())) { - child = cascadesContext.getStatementContext().getRewrittenCtePlan().get(cteProducer.getCteId()); + if (cascadesContext.getStatementContext().getRewrittenCteProducer().containsKey(cteProducer.getCteId())) { + child = cascadesContext.getStatementContext().getRewrittenCteProducer().get(cteProducer.getCteId()); } else { child = (LogicalPlan) cteProducer.child(); child = tryToConstructFilter(cascadesContext, cteProducer.getCteId(), child); @@ -118,7 +118,7 @@ public Plan visitLogicalCTEProducer(LogicalCTEProducer cteProduc CascadesContext rewrittenCtx = CascadesContext.newSubtreeContext( Optional.of(cteProducer.getCteId()), cascadesContext, child, PhysicalProperties.ANY); child = (LogicalPlan) child.accept(this, rewrittenCtx); - cascadesContext.getStatementContext().getRewrittenCtePlan().put(cteProducer.getCteId(), child); + cascadesContext.getStatementContext().getRewrittenCteProducer().put(cteProducer.getCteId(), child); } return cteProducer.withChildren(child); } diff --git a/regression-test/suites/nereids_syntax_p0/cte.groovy b/regression-test/suites/nereids_syntax_p0/cte.groovy index 2fffefd8067898..ba945569919394 100644 --- a/regression-test/suites/nereids_syntax_p0/cte.groovy +++ b/regression-test/suites/nereids_syntax_p0/cte.groovy @@ -324,5 +324,10 @@ suite("cte") { ) tab WHERE Id IN (1, 2) """ + + // rewrite cte children should work well with cost based rewrite rule. rely on rewrite rule: InferSetOperatorDistinct + sql """ + WITH cte_0 AS ( SELECT 1 AS a ), cte_1 AS ( SELECT 1 AS a ) select * from cte_0, cte_1 union select * from cte_0, cte_1 + """ }