diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java index 5b4e4092e54953..3b9caa40908fec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java @@ -151,9 +151,12 @@ private boolean shouldUseMultiDistinct(LogicalAggregate agg) { } } } else { - if (AggregateUtils.hasUnknownStatistics(agg.getGroupByExpressions(), childStats)) { + if (agg.hasSkewHint()) { return false; } + if (AggregateUtils.hasUnknownStatistics(agg.getGroupByExpressions(), childStats)) { + return true; + } // The joint ndv of Group by key is high, so multi_distinct is not selected; if (aggStats.getRowCount() >= row * AggregateUtils.LOW_CARDINALITY_THRESHOLD) { return false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java index 2b7804973682a1..e34d56d383c6da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java @@ -41,6 +41,7 @@ import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.Statistics; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -89,7 +90,8 @@ public List buildRules() { ); } - private boolean shouldUseMultiDistinct(LogicalAggregate aggregate) { + @VisibleForTesting + boolean shouldUseMultiDistinct(LogicalAggregate aggregate) { // count(distinct a,b) cannot use multi_distinct if (AggregateUtils.containsCountDistinctMultiExpr(aggregate)) { return false; @@ -111,7 +113,7 @@ private boolean shouldUseMultiDistinct(LogicalAggregate aggregat // has unknown statistics, split to bottom and top agg if (AggregateUtils.hasUnknownStatistics(aggregate.getGroupByExpressions(), aggChildStats) || AggregateUtils.hasUnknownStatistics(dstArgs, aggChildStats)) { - return false; + return true; } double gbyNdv = aggStats.getRowCount(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java index 2d3058e434a1a9..99ab10f982b115 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java @@ -167,4 +167,17 @@ default boolean canSkewRewrite() { && !getGroupByExpressions().isEmpty() && !(new HashSet<>(getGroupByExpressions()).containsAll(distinctArguments)); } + + /** + * hasSkewHint + * @return true if there is at least one skew hint + */ + default boolean hasSkewHint() { + for (AggregateFunction aggFunc : getAggregateFunctions()) { + if (aggFunc.isSkew()) { + return true; + } + } + return false; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java index 1664cc18dfcf98..fcab4e2ae8c2af 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java @@ -19,13 +19,18 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.expressions.functions.agg.Count; +import org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinctCount; import org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinctGroupConcat; import org.apache.doris.nereids.trees.expressions.functions.agg.Sum0; import org.apache.doris.nereids.trees.expressions.functions.scalar.If; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.PlanChecker; import org.apache.doris.utframe.TestWithFeService; +import mockit.Mock; +import mockit.MockUp; import org.junit.jupiter.api.Test; public class DistinctAggregateRewriterTest extends TestWithFeService implements MemoPatternMatchSupported { @@ -39,8 +44,18 @@ protected void runBeforeAll() throws Exception { connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION"); } + private void applyMock() { + new MockUp() { + @Mock + boolean shouldUseMultiDistinct(LogicalAggregate aggregate) { + return false; + } + }; + } + @Test void testSplitSingleDistinctAgg() { + applyMock(); PlanChecker.from(connectContext) .analyze("select b, count(distinct a) from test.distinct_agg_split_t group by b") .rewrite() @@ -59,6 +74,7 @@ void testSplitSingleDistinctAgg() { @Test void testSplitSingleDistinctAggOtherFunctionCount() { + applyMock(); PlanChecker.from(connectContext) .analyze("select b, count(distinct a), count(a) from test.distinct_agg_split_t group by b") .rewrite() @@ -77,6 +93,7 @@ void testSplitSingleDistinctAggOtherFunctionCount() { @Test void testSplitSingleDistinctWithOtherAgg() { + applyMock(); PlanChecker.from(connectContext) .analyze("select b, count(distinct a), sum(c) from test.distinct_agg_split_t group by b") .rewrite() @@ -93,6 +110,7 @@ void testSplitSingleDistinctWithOtherAgg() { @Test void testNotSplitWhenNoGroupBy() { + applyMock(); PlanChecker.from(connectContext) .analyze("select count(distinct a) from test.distinct_agg_split_t") .rewrite() @@ -102,6 +120,7 @@ void testNotSplitWhenNoGroupBy() { @Test void testSplitWhenNoGroupByHasGroupConcatDistinctOrderBy() { + applyMock(); PlanChecker.from(connectContext) .analyze("select group_concat(distinct a, '' order by b) from test.distinct_agg_split_t") .rewrite() @@ -113,6 +132,7 @@ void testSplitWhenNoGroupByHasGroupConcatDistinctOrderBy() { @Test void testSplitWhenNoGroupByHasGroupConcatDistinct() { + applyMock(); PlanChecker.from(connectContext) .analyze("select group_concat(distinct a, '') from test.distinct_agg_split_t") .rewrite() @@ -124,6 +144,7 @@ void testSplitWhenNoGroupByHasGroupConcatDistinct() { @Test void testMultiExprDistinct() { + applyMock(); PlanChecker.from(connectContext) .analyze("select b, sum(a), count(distinct a,c) from test.distinct_agg_split_t group by b") .rewrite() @@ -142,6 +163,7 @@ void testMultiExprDistinct() { @Test void testNotSplitWhenNoDistinct() { + applyMock(); PlanChecker.from(connectContext) .analyze("select b, sum(a), count(c) from test.distinct_agg_split_t group by b") .rewrite() @@ -151,6 +173,7 @@ void testNotSplitWhenNoDistinct() { @Test void testSplitWithComplexExpression() { + applyMock(); PlanChecker.from(connectContext) .analyze("select b, count(distinct a + 1) from test.distinct_agg_split_t group by b") .rewrite() @@ -161,4 +184,19 @@ void testSplitWithComplexExpression() { ).when(agg -> agg.getGroupByExpressions().size() == 1 && agg.getGroupByExpressions().get(0).toSql().equals("b"))); } + + @Test + void testMultiDistinct() { + connectContext.getSessionVariable().setAggPhase(2); + PlanChecker.from(connectContext) + .analyze("select b, count(distinct a), sum(c) from test.distinct_agg_split_t group by b") + .rewrite() + .printlnTree() + .matches( + logicalAggregate().when(agg -> agg.getGroupByExpressions().size() == 1 + && agg.getGroupByExpressions().get(0).toSql().equals("b") + && agg.getAggregateFunctions().stream().noneMatch(AggregateFunction::isDistinct) + && agg.getAggregateFunctions().stream().anyMatch(f -> f instanceof MultiDistinctCount) + )); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java index 870124d2a498a4..fe8af6fee86d68 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java @@ -199,4 +199,19 @@ void countMultiColumnsWithGby() { ); }); } + + @Test + void multiSumWithGby() { + String sql = "select sum(distinct b), sum(distinct a) from test_distinct_multi group by c"; + PlanChecker.from(connectContext).checkExplain(sql, planner -> { + Plan plan = planner.getOptimizedPlan(); + MatchingUtils.assertMatches(plan, + physicalResultSink( + physicalDistribute( + physicalProject( + physicalHashAggregate( + physicalDistribute( + physicalHashAggregate(any()))))))); + }); + } } diff --git a/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out b/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out index 09626347b110d7..05ca7f01e20dd5 100644 --- a/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out +++ b/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out @@ -1,19 +1,9 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !avg_shape -- -PhysicalCteAnchor ( cteId=CTEId#0 ) ---PhysicalCteProducer ( cteId=CTEId#0 ) -----PhysicalOlapScan[test_adjust_nullable_t] ---PhysicalResultSink -----PhysicalProject[(cast(sum(DISTINCT b) as DOUBLE) / cast(count(DISTINCT b) as DOUBLE)) AS `AVG(distinct b)`, non_nullable((cast(sum(DISTINCT a) as DOUBLE) / cast(count(DISTINCT a) as DOUBLE))) AS `AVG(distinct a)`] -------hashJoin[INNER_JOIN colocated] hashCondition=((c <=> .c)) otherCondition=() ---------PhysicalProject[c AS `c`, count(DISTINCT a) AS `count(DISTINCT a)`, sum(DISTINCT a) AS `sum(DISTINCT a)`] -----------hashAgg[DISTINCT_GLOBAL] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------PhysicalProject[.c, count(DISTINCT b) AS `count(DISTINCT b)`, sum(DISTINCT b) AS `sum(DISTINCT b)`] -----------hashAgg[DISTINCT_GLOBAL] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalCteConsumer ( cteId=CTEId#0 ) +PhysicalResultSink +--PhysicalProject[(cast(sum(DISTINCT b) as DOUBLE) / cast(count(DISTINCT b) as DOUBLE)) AS `AVG(distinct b)`, non_nullable((cast(sum(DISTINCT a) as DOUBLE) / cast(count(DISTINCT a) as DOUBLE))) AS `AVG(distinct a)`] +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecHash] +--------hashAgg[LOCAL] +----------PhysicalOlapScan[test_adjust_nullable_t] diff --git a/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out b/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out index 360052e0f2a12a..813a08ce40d4e8 100644 --- a/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out +++ b/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out @@ -353,8 +353,7 @@ PhysicalResultSink -- !shape_hint_other_agg_func -- PhysicalResultSink --hashAgg[GLOBAL] -----hashAgg[GLOBAL] -------PhysicalOlapScan[test_skew_hint] +----PhysicalOlapScan[test_skew_hint] -- !shape_hint_other_agg_func_expr -- PhysicalResultSink @@ -367,9 +366,8 @@ PhysicalResultSink -- !shape_hint_same_column_with_group_by -- PhysicalResultSink --hashAgg[GLOBAL] -----hashAgg[GLOBAL] -------hashAgg[LOCAL] ---------PhysicalOlapScan[test_skew_hint] +----hashAgg[LOCAL] +------PhysicalOlapScan[test_skew_hint] -- !shape_hint_same_column_with_group_by_expr -- PhysicalResultSink @@ -391,10 +389,9 @@ PhysicalResultSink -- !shape_hint_other_agg_func_grouping -- PhysicalResultSink --hashAgg[GLOBAL] -----hashAgg[GLOBAL] -------hashAgg[LOCAL] ---------PhysicalRepeat -----------PhysicalOlapScan[test_skew_hint] +----hashAgg[LOCAL] +------PhysicalRepeat +--------PhysicalOlapScan[test_skew_hint] -- !shape_hint_other_agg_func_expr_grouping -- PhysicalResultSink @@ -567,12 +564,10 @@ PhysicalResultSink PhysicalResultSink --hashAgg[GLOBAL] ----hashAgg[LOCAL] -------hashAgg[GLOBAL] ---------PhysicalOlapScan[test_skew_hint] +------PhysicalOlapScan[test_skew_hint] -- !shape_not_rewrite -- PhysicalResultSink --hashAgg[GLOBAL] -----hashAgg[GLOBAL] -------PhysicalOlapScan[test_skew_hint] +----PhysicalOlapScan[test_skew_hint] diff --git a/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out b/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out index e63c5951f4be36..a405272b3d3ccf 100644 --- a/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out +++ b/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out @@ -133,10 +133,9 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] --------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------hashAgg[LOCAL] -----------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] -- !agg_distinct_satisfy_gby_key -- PhysicalResultSink @@ -144,8 +143,7 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] --------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] +----------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] -- !agg_distinct_satisfy_dst_key -- PhysicalResultSink @@ -155,39 +153,35 @@ PhysicalResultSink --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] +--------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] -- !agg_distinct_with_gby_key_with_other_func -- PhysicalResultSink --PhysicalQuickSort[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] ---------hashAgg[DISTINCT_GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------hashAgg[LOCAL] -----------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] -- !agg_distinct_satisfy_gby_key_with_other_func -- PhysicalResultSink --PhysicalQuickSort[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] ---------hashAgg[DISTINCT_GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] +--------hashAgg[GLOBAL] +----------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] -- !agg_distinct_satisfy_dst_key_with_other_func -- PhysicalResultSink --PhysicalQuickSort[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] ---------hashAgg[DISTINCT_GLOBAL] +--------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] +------------hashAgg[LOCAL] +--------------PhysicalOlapScan[t_gbykey_10_dstkey_10_1000_id] -- !agg_distinct_without_gby_key -- PhysicalResultSink @@ -547,10 +541,9 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] --------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------hashAgg[LOCAL] -----------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] -- !agg_distinct_satisfy_gby_key_low_ndv -- PhysicalResultSink @@ -558,8 +551,7 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] --------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] +----------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] -- !agg_distinct_satisfy_dst_key_low_ndv -- PhysicalResultSink @@ -569,39 +561,35 @@ PhysicalResultSink --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] +--------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] -- !agg_distinct_with_gby_key_with_other_func_low_ndv -- PhysicalResultSink --PhysicalQuickSort[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] ---------hashAgg[DISTINCT_GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------hashAgg[LOCAL] -----------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] +--------hashAgg[GLOBAL] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] -- !agg_distinct_satisfy_gby_key_with_other_func_low_ndv -- PhysicalResultSink --PhysicalQuickSort[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] ---------hashAgg[DISTINCT_GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] +--------hashAgg[GLOBAL] +----------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] -- !agg_distinct_satisfy_dst_key_with_other_func_low_ndv -- PhysicalResultSink --PhysicalQuickSort[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] ---------hashAgg[DISTINCT_GLOBAL] +--------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] +------------hashAgg[LOCAL] +--------------PhysicalOlapScan[t_gbykey_2_dstkey_10_30_id] -- !agg_distinct_without_gby_key_low_ndv -- PhysicalResultSink diff --git a/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out b/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out index ea5361dc350ca8..05ef347e89bcbd 100644 --- a/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out +++ b/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out @@ -102,19 +102,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------------PhysicalDistribute[DistributionSpecExecutionAny] --------------------PhysicalCteConsumer ( cteId=CTEId#0 ) --- !no_stats_should_use_cte_with_group_by -- -PhysicalCteAnchor ( cteId=CTEId#0 ) ---PhysicalCteProducer ( cteId=CTEId#0 ) -----PhysicalOlapScan[t1000] ---PhysicalResultSink -----PhysicalDistribute[DistributionSpecGather] -------hashJoin[INNER_JOIN colocated] hashCondition=((a_1 <=> .a_1)) otherCondition=() ---------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------PhysicalCteConsumer ( cteId=CTEId#0 ) +-- !no_stats_should_use_multi_distinct -- +PhysicalResultSink +--PhysicalDistribute[DistributionSpecGather] +----hashAgg[GLOBAL] +------PhysicalDistribute[DistributionSpecHash] +--------hashAgg[LOCAL] +----------PhysicalOlapScan[t1000] diff --git a/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out b/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out index 4c9866e9568385..0a009cc77df199 100644 --- a/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out +++ b/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out @@ -219,13 +219,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------hashJoin[INNER_JOIN colocated] hashCondition=((gby_key <=> .gby_key)) otherCondition=() --------hashJoin[INNER_JOIN colocated] hashCondition=((gby_key <=> .gby_key)) otherCondition=() ----------hashAgg[GLOBAL] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------PhysicalDistribute[DistributionSpecHash] +--------------PhysicalCteConsumer ( cteId=CTEId#0 ) ----------hashAgg[GLOBAL] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------PhysicalDistribute[DistributionSpecHash] +--------------PhysicalCteConsumer ( cteId=CTEId#0 ) --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out index a8bdae2200e23d..735f924821a8e0 100644 --- a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out +++ b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out @@ -537,18 +537,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------------PhysicalCteConsumer ( cteId=CTEId#0 ) -- !has_grouping -- -PhysicalCteAnchor ( cteId=CTEId#0 ) ---PhysicalCteProducer ( cteId=CTEId#0 ) -----PhysicalRepeat -------PhysicalOlapScan[test_distinct_multi] ---PhysicalResultSink -----hashJoin[INNER_JOIN] hashCondition=((GROUPING_ID <=> .GROUPING_ID) and (a <=> .a) and (b <=> .b) and (c <=> .c)) otherCondition=() -------hashAgg[GLOBAL] ---------hashAgg[GLOBAL] -----------PhysicalCteConsumer ( cteId=CTEId#0 ) -------hashAgg[GLOBAL] ---------hashAgg[GLOBAL] -----------PhysicalCteConsumer ( cteId=CTEId#0 ) +PhysicalResultSink +--hashAgg[GLOBAL] +----hashAgg[LOCAL] +------PhysicalRepeat +--------PhysicalOlapScan[test_distinct_multi] -- !null_hash -- 1 \N 0 0.0 diff --git a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out index 51002fa7feb92c..f66de4badaaa0c 100644 --- a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out +++ b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out @@ -126,8 +126,7 @@ PhysicalResultSink ------PhysicalQuickSort[LOCAL_SORT] --------PhysicalProject ----------hashAgg[GLOBAL] -------------hashAgg[GLOBAL] ---------------PhysicalOlapScan[mal_test1] +------------PhysicalOlapScan[mal_test1] -- !middle_project_has_expression_cannot_merge_shape1 -- PhysicalResultSink @@ -196,12 +195,11 @@ PhysicalResultSink ------PhysicalQuickSort[LOCAL_SORT] --------PhysicalProject ----------hashAgg[GLOBAL] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalOlapScan[mal_test1] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalOlapScan[mal_test1] -- !inner_agg_has_distinct_cannot_merge_shape -- PhysicalResultSink @@ -214,8 +212,7 @@ PhysicalResultSink --------------hashAgg[LOCAL] ----------------PhysicalProject ------------------hashAgg[GLOBAL] ---------------------hashAgg[GLOBAL] -----------------------PhysicalOlapScan[mal_test1] +--------------------PhysicalOlapScan[mal_test1] -- !agg_with_expr_cannot_merge_shape1 -- PhysicalResultSink @@ -255,11 +252,10 @@ PhysicalResultSink ------hashAgg[LOCAL] --------PhysicalProject ----------hashAgg[GLOBAL] -------------hashAgg[GLOBAL] ---------------PhysicalDistribute[DistributionSpecHash] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------PhysicalOlapScan[mal_test_merge_agg] +------------PhysicalDistribute[DistributionSpecHash] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------PhysicalOlapScan[mal_test_merge_agg] -- !test_distinct_expr_transform -- -1 @@ -282,11 +278,10 @@ PhysicalResultSink --PhysicalDistribute[DistributionSpecGather] ----PhysicalProject ------hashAgg[GLOBAL] ---------hashAgg[GLOBAL] -----------PhysicalDistribute[DistributionSpecHash] -------------hashAgg[LOCAL] ---------------PhysicalProject -----------------PhysicalOlapScan[mal_test_merge_agg] +--------PhysicalDistribute[DistributionSpecHash] +----------hashAgg[LOCAL] +------------PhysicalProject +--------------PhysicalOlapScan[mal_test_merge_agg] -- !test_sum_empty_table -- \N \N \N diff --git a/regression-test/data/shape_check/clickbench/query10.out b/regression-test/data/shape_check/clickbench/query10.out index ae9174ce1c1551..c784056436912a 100644 --- a/regression-test/data/shape_check/clickbench/query10.out +++ b/regression-test/data/shape_check/clickbench/query10.out @@ -4,10 +4,9 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------hashAgg[DISTINCT_GLOBAL] +--------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalProject -------------------PhysicalOlapScan[hits] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------PhysicalOlapScan[hits] diff --git a/regression-test/data/shape_check/clickbench/query11.out b/regression-test/data/shape_check/clickbench/query11.out index 856e55187f92a7..4b5e4486d3f4cc 100644 --- a/regression-test/data/shape_check/clickbench/query11.out +++ b/regression-test/data/shape_check/clickbench/query11.out @@ -7,8 +7,7 @@ PhysicalResultSink --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalProject -------------------filter(( not (MobilePhoneModel = ''))) ---------------------PhysicalOlapScan[hits] +--------------PhysicalProject +----------------filter(( not (MobilePhoneModel = ''))) +------------------PhysicalOlapScan[hits] diff --git a/regression-test/data/shape_check/clickbench/query12.out b/regression-test/data/shape_check/clickbench/query12.out index d47a7e129e39bd..10928363a83c02 100644 --- a/regression-test/data/shape_check/clickbench/query12.out +++ b/regression-test/data/shape_check/clickbench/query12.out @@ -7,8 +7,7 @@ PhysicalResultSink --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalProject -------------------filter(( not (MobilePhoneModel = ''))) ---------------------PhysicalOlapScan[hits] +--------------PhysicalProject +----------------filter(( not (MobilePhoneModel = ''))) +------------------PhysicalOlapScan[hits] diff --git a/regression-test/data/shape_check/clickbench/query14.out b/regression-test/data/shape_check/clickbench/query14.out index 54afcc6268ce39..35eedce41b927a 100644 --- a/regression-test/data/shape_check/clickbench/query14.out +++ b/regression-test/data/shape_check/clickbench/query14.out @@ -7,8 +7,7 @@ PhysicalResultSink --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalProject -------------------filter(( not (SearchPhrase = ''))) ---------------------PhysicalOlapScan[hits] +--------------PhysicalProject +----------------filter(( not (SearchPhrase = ''))) +------------------PhysicalOlapScan[hits] diff --git a/regression-test/data/shape_check/clickbench/query23.out b/regression-test/data/shape_check/clickbench/query23.out index 5c6ed877934180..76a91b3ad49968 100644 --- a/regression-test/data/shape_check/clickbench/query23.out +++ b/regression-test/data/shape_check/clickbench/query23.out @@ -7,8 +7,7 @@ PhysicalResultSink --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalProject -------------------filter(( not (SearchPhrase = '')) and ( not (URL like '%.google.%')) and (Title like '%Google%')) ---------------------PhysicalOlapScan[hits] +--------------PhysicalProject +----------------filter(( not (SearchPhrase = '')) and ( not (URL like '%.google.%')) and (Title like '%Google%')) +------------------PhysicalOlapScan[hits] diff --git a/regression-test/data/shape_check/clickbench/query9.out b/regression-test/data/shape_check/clickbench/query9.out index b35cb2e2a808e9..dcece9f0ce72d7 100644 --- a/regression-test/data/shape_check/clickbench/query9.out +++ b/regression-test/data/shape_check/clickbench/query9.out @@ -7,7 +7,6 @@ PhysicalResultSink --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecHash] ------------hashAgg[LOCAL] ---------------hashAgg[GLOBAL] -----------------PhysicalProject -------------------PhysicalOlapScan[hits] +--------------PhysicalProject +----------------PhysicalOlapScan[hits] diff --git a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out index 1ed947eacabb04..7b04caaf3e087a 100644 --- a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out +++ b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out @@ -5,18 +5,17 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] --------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------hashAgg[LOCAL] -----------------PhysicalProject -------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] ---------------------hashJoin[LEFT_ANTI_JOIN broadcast] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() -----------------------PhysicalProject -------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 -----------------------PhysicalProject -------------------------filter((s_comment like '%Customer%Complaints%')) ---------------------------PhysicalOlapScan[supplier] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------hashJoin[LEFT_ANTI_JOIN broadcast] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() --------------------PhysicalProject -----------------------filter(( not (p_brand = 'Brand#45')) and ( not (p_type like 'MEDIUM POLISHED%')) and p_size IN (14, 19, 23, 3, 36, 45, 49, 9)) -------------------------PhysicalOlapScan[part] +----------------------PhysicalOlapScan[partsupp] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((s_comment like '%Customer%Complaints%')) +------------------------PhysicalOlapScan[supplier] +------------------PhysicalProject +--------------------filter(( not (p_brand = 'Brand#45')) and ( not (p_type like 'MEDIUM POLISHED%')) and p_size IN (14, 19, 23, 3, 36, 45, 49, 9)) +----------------------PhysicalOlapScan[part] diff --git a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out index 1ed947eacabb04..7b04caaf3e087a 100644 --- a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out +++ b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out @@ -5,18 +5,17 @@ PhysicalResultSink ----PhysicalDistribute[DistributionSpecGather] ------PhysicalQuickSort[LOCAL_SORT] --------hashAgg[GLOBAL] -----------hashAgg[GLOBAL] -------------PhysicalDistribute[DistributionSpecHash] ---------------hashAgg[LOCAL] -----------------PhysicalProject -------------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] ---------------------hashJoin[LEFT_ANTI_JOIN broadcast] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() -----------------------PhysicalProject -------------------------PhysicalOlapScan[partsupp] apply RFs: RF0 -----------------------PhysicalProject -------------------------filter((s_comment like '%Customer%Complaints%')) ---------------------------PhysicalOlapScan[supplier] +----------PhysicalDistribute[DistributionSpecHash] +------------hashAgg[LOCAL] +--------------PhysicalProject +----------------hashJoin[INNER_JOIN colocated] hashCondition=((part.p_partkey = partsupp.ps_partkey)) otherCondition=() build RFs:RF0 p_partkey->[ps_partkey] +------------------hashJoin[LEFT_ANTI_JOIN broadcast] hashCondition=((partsupp.ps_suppkey = supplier.s_suppkey)) otherCondition=() --------------------PhysicalProject -----------------------filter(( not (p_brand = 'Brand#45')) and ( not (p_type like 'MEDIUM POLISHED%')) and p_size IN (14, 19, 23, 3, 36, 45, 49, 9)) -------------------------PhysicalOlapScan[part] +----------------------PhysicalOlapScan[partsupp] apply RFs: RF0 +--------------------PhysicalProject +----------------------filter((s_comment like '%Customer%Complaints%')) +------------------------PhysicalOlapScan[supplier] +------------------PhysicalProject +--------------------filter(( not (p_brand = 'Brand#45')) and ( not (p_type like 'MEDIUM POLISHED%')) and p_size IN (14, 19, 23, 3, 36, 45, 49, 9)) +----------------------PhysicalOlapScan[part] diff --git a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy index bd317546768f32..406a409390b6fc 100644 --- a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy +++ b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy @@ -38,6 +38,6 @@ suite("distinct_agg_strategy_selector") { sql "drop stats t1000" qt_no_stats_should_use_cte """explain shape plan select count(distinct a_1) , count(distinct b_5) from t1000;""" - qt_no_stats_should_use_cte_with_group_by """explain shape plan + qt_no_stats_should_use_multi_distinct """explain shape plan select count(distinct d_20) , count(distinct b_5) from t1000 group by a_1;""" } \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy index 27a8b693dfe9ac..d12442a26f1716 100644 --- a/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy @@ -21,6 +21,7 @@ suite("agg_variety") { sql "use ${db}" sql "set runtime_filter_mode=OFF"; sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "set pre_materialized_view_rewrite_strategy = TRY_IN_RBO" sql """ drop table if exists orders