From ffe593cdeb37f94b03798304789f24c96319c2e8 Mon Sep 17 00:00:00 2001 From: seawinde Date: Fri, 5 Dec 2025 18:01:27 +0800 Subject: [PATCH] [fix](mv) Fix sync mv could not be chosen by cbo stable because stats upload from be not in time (#58720) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR introduces two key modifications to improve the reliability of the optimizer (CBO) and the testing pipeline concerning materialized views: 1. Fixes unstable selection of synchronous materialized views in the pipeline:​ The root cause was that the statistical information of the base table was not reported promptly. This delay could lead the CBO to estimate selectedPartitionsRowCountas zero for the materialized view, causing it to be incorrectly bypassed during query planning. The fix involves manually injecting the statistical information for the synchronous materialized view​ when available, ensuring the CBO can make a stable and accurate assessment of its cost . 2. Removes the fuzzy value setting for preMaterializedViewRewriteStrategyin SessionVariable:​ The test cases for the TRY_IN_RBO strategy have already been extended to cover scenarios equivalent to both NOT_IN_RBO and FORCE_IN_RBO. Retaining the fuzzy option can introduce unnecessary instability into the pipeline. For instance, a PR might pass tests when the strategy is TRY_IN_RBO but fail if the pipeline randomly selects NOT_IN_RBO, leading to inconsistent results. Removing this fuzzy setting helps eliminate such non-deterministic failures and enhances pipeline stability --- .../doris/nereids/stats/StatsCalculator.java | 6 ++++++ .../java/org/apache/doris/qe/SessionVariable.java | 15 --------------- .../multiple_no_where/multiple_no_where.groovy | 2 ++ .../mv_p0/ssb/multiple_ssb/multiple_ssb.groovy | 4 ++++ .../multiple_ssb_between.groovy | 6 +++++- .../suites/mv_p0/ssb/q_1_1/q_1_1.groovy | 1 + .../suites/mv_p0/ssb/q_2_1/q_2_1.groovy | 1 + .../suites/mv_p0/ssb/q_3_1/q_3_1.groovy | 1 + .../suites/mv_p0/ssb/q_4_1/q_4_1.groovy | 1 + .../suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy | 1 + 10 files changed, 22 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index f0ca1f1e6bab88..c73ee7fc0e3d15 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -543,6 +543,12 @@ public Statistics computeOlapScan(OlapScan olapScan) { optStats.isPresent(), tableRowCount, olapTable.getQualifiedName()); if (optStats.isPresent()) { double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan, tableRowCount); + if (isRegisteredRowCount(olapScan)) { + // If a row count is injected for the materialized view, use it to fix the issue where + // the materialized view cannot be selected by cbo stable due to selectedPartitionsRowCount being 0, + // which is caused by delayed statistics reporting. + selectedPartitionsRowCount = tableRowCount; + } LOG.info("computeOlapScan optStats is {}, selectedPartitionsRowCount is {}", optStats.get(), selectedPartitionsRowCount); // if estimated mv rowCount is more than actual row count, fall back to base table stats diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ab221d693af48f..9a7d86b53ecbcc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -32,7 +32,6 @@ import org.apache.doris.nereids.metrics.EventSwitchParser; import org.apache.doris.nereids.parser.Dialect; import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.rules.exploration.mv.PreMaterializedViewRewriter.PreRewriteStrategy; import org.apache.doris.nereids.rules.expression.ExpressionRuleType; import org.apache.doris.planner.GroupCommitBlockSink; import org.apache.doris.qe.VariableMgr.VarAttr; @@ -3216,20 +3215,6 @@ public void initFuzzyModeVariables() { this.enableReserveMemory = randomInt % 5 != 0; } - // random pre materialized view rewrite strategy - randomInt = random.nextInt(3); - switch (randomInt % 3) { - case 0: - this.preMaterializedViewRewriteStrategy = PreRewriteStrategy.NOT_IN_RBO.name(); - break; - case 1: - this.preMaterializedViewRewriteStrategy = PreRewriteStrategy.TRY_IN_RBO.name(); - break; - case 2: - default: - this.preMaterializedViewRewriteStrategy = PreRewriteStrategy.FORCE_IN_RBO.name(); - break; - } setFuzzyForCatalog(random); } diff --git a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy index a7b3145dddc4fa..0213a993be062c 100644 --- a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy +++ b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy @@ -107,6 +107,8 @@ suite ("multiple_no_where") { qt_select_star "select * from lineorder_flat order by 1,2, P_MFGR;" sql """analyze table lineorder_flat with sync;""" + sql """alter table lineorder_flat modify column C_CITY set stats ('row_count'='7');""" + sql """alter table lineorder_flat modify column a3 set stats ('row_count'='1');""" sql """set enable_stats=false;""" mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue diff --git a/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy b/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy index 22e53a58a2db94..97f31ed3b58fed 100644 --- a/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy +++ b/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy @@ -154,6 +154,10 @@ suite ("multiple_ssb") { sql """analyze table lineorder_flat with sync;""" sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='8');""" + sql """alter table lineorder_flat modify column a1 set stats ('row_count'='1');""" + sql """alter table lineorder_flat modify column a4 set stats ('row_count'='1');""" + sql """alter table lineorder_flat modify column a6 set stats ('row_count'='1');""" + sql """alter table lineorder_flat modify column x2 set stats ('row_count'='1');""" sql """set enable_stats=false;""" mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue diff --git a/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy b/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy index 2eec2f8907aa00..6127b97dbf8c9e 100644 --- a/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy +++ b/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy @@ -153,7 +153,11 @@ suite ("multiple_ssb_between") { sql """set enable_stats=true;""" sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='8');""" - + sql """alter table lineorder_flat modify column a1 set stats ('row_count'='1');""" + sql """alter table lineorder_flat modify column a4 set stats ('row_count'='1');""" + sql """alter table lineorder_flat modify column a6 set stats ('row_count'='1');""" + sql """alter table lineorder_flat modify column x2 set stats ('row_count'='1');""" + mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE diff --git a/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy b/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy index d5abacca118fc6..2ba3eb6d9b4d3f 100644 --- a/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy @@ -102,6 +102,7 @@ suite ("mv_ssb_q_1_1") { sql "analyze table lineorder_flat with sync;" sql """alter table lineorder_flat modify column C_CITY set stats ('row_count'='6');""" + sql """alter table lineorder_flat modify column a1 set stats ('row_count'='1');""" mv_rewrite_success("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue FROM lineorder_flat WHERE diff --git a/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy b/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy index c40d2842228336..e40c84f9b1f1e5 100644 --- a/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy @@ -94,6 +94,7 @@ suite ("mv_ssb_q_2_1") { qt_select_star "select * from lineorder_flat order by 1,2,P_MFGR;" sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='2');""" + sql """alter table lineorder_flat modify column a2 set stats ('row_count'='1');""" mv_rewrite_success("""SELECT SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, diff --git a/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy b/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy index 564a5a1dbc7742..0497c3609fd762 100644 --- a/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy @@ -99,6 +99,7 @@ suite ("mv_ssb_q_3_1") { sql """analyze table lineorder_flat with sync;""" sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='2');""" + sql """alter table lineorder_flat modify column a1 set stats ('row_count'='1');""" mv_rewrite_success("""SELECT C_NATION, diff --git a/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy b/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy index b5fa2332a9f9fc..0f0bbc50a71f90 100644 --- a/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy @@ -93,6 +93,7 @@ suite ("mv_ssb_q_4_1") { sql """analyze table lineorder_flat with sync;""" sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='2');""" + sql """alter table lineorder_flat modify column a2 set stats ('row_count'='1');""" mv_rewrite_success("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR, C_NATION, diff --git a/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy b/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy index e36cd614520721..1b9ed83d4ff03d 100644 --- a/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy @@ -96,6 +96,7 @@ suite ("q_4_1_r1") { sql """analyze table lineorder_flat with sync;""" sql """alter table lineorder_flat modify column LO_ORDERDATE set stats ('row_count'='8');""" + sql """alter table lineorder_flat modify column a1 set stats ('row_count'='1');""" sql """set enable_stats=false;""" mv_rewrite_success("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR,