From 46ffe7b7a5e5cd550d3d49b4af48005cf353ae44 Mon Sep 17 00:00:00 2001 From: starocean999 <12095047@qq.com> Date: Tue, 19 Dec 2023 09:59:22 +0800 Subject: [PATCH 1/4] [refactor](nereids)make NormalizeAggregate rule more clear and readable --- .../rules/analysis/NormalizeAggregate.java | 163 +++++++++--------- .../aggregate/agg_distinct_case_when.groovy | 54 ++++++ 2 files changed, 135 insertions(+), 82 deletions(-) create mode 100644 regression-test/suites/nereids_p0/aggregate/agg_distinct_case_when.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java index d265c3d8d40830..577d4c6fbf53ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java @@ -20,6 +20,7 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.rules.rewrite.NormalizeToSlot; +import org.apache.doris.nereids.rules.rewrite.NormalizeToSlot.NormalizeToSlotContext; import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.Expression; @@ -40,11 +41,9 @@ import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -100,22 +99,74 @@ public class NormalizeAggregate extends OneRewriteRuleFactory implements Normali @Override public Rule build() { return logicalAggregate().whenNot(LogicalAggregate::isNormalized).then(aggregate -> { + // The LogicalAggregate node may contain window agg functions and usual agg functions + // we call window agg functions as window-agg and usual agg functions as trival-agg for short + // This rule simplify LogicalAggregate node by: + // 1. Push down some exprs from old LogicalAggregate node to a new child LogicalProject Node, + // 2. create a new LogicalAggregate with normalized group by exprs and trival-aggs + // 3. Pull up normalized old LogicalAggregate's output exprs to a new parent LogicalProject Node + // Push down exprs: + // 1. all group by exprs + // 2. subquery expr in trival-agg + // 3. all input slots of trival-agg + // 4. expr(including subquery) in distinct trival-agg + // Normalize LogicalAggregate's output. + // 1. normalize group by exprs by outputs of bottom LogicalProject + // 2. normalize trival-aggs by outputs of bottom LogicalProject + // 3. build normalized agg outputs + // Pull up exprs: + // normalize all output exprs in old LogicalAggregate to build a parent project node, typically includes: + // 1. simple slots + // 2. aliases + // a. alias with no aggs child + // b. alias with trival-agg child + // c. alias with window-agg - List aggregateOutput = aggregate.getOutputExpressions(); - Set existsAlias = ExpressionUtils.mutableCollect(aggregateOutput, Alias.class::isInstance); + // Push down exprs: + // collect group by exprs + Set groupingByExprs = + ImmutableSet.copyOf(aggregate.getGroupByExpressions()); + // collect all trival-agg + List aggregateOutput = aggregate.getOutputExpressions(); List aggFuncs = Lists.newArrayList(); aggregateOutput.forEach(o -> o.accept(CollectNonWindowedAggFuncs.INSTANCE, aggFuncs)); - // we need push down subquery exprs inside non-window and non-distinct agg functions + // collect subquery expr in trival-agg Set subqueryExprs = ExpressionUtils.mutableCollect(aggFuncs.stream() .filter(aggFunc -> !aggFunc.isDistinct()).collect(Collectors.toList()), SubqueryExpr.class::isInstance); - Set groupingByExprs = ImmutableSet.copyOf(aggregate.getGroupByExpressions()); + + // collect all input slots of trival-agg + Set allAggFuncInputSlots = aggFuncs.stream() + .flatMap(agg -> agg.getInputSlots().stream()).collect(Collectors.toSet()); + + // collect expr in distinct trival-agg + Set distinctAggChildExprs = aggFuncs.stream() + .filter(agg -> agg.isDistinct()).flatMap(agg -> agg.children().stream()) + .filter(child -> !(child instanceof SlotReference && child instanceof Literal)) + .collect(Collectors.toSet()); + + Set existsAlias = + ExpressionUtils.mutableCollect(aggregateOutput, Alias.class::isInstance); + + // push down group by and subquery exprs to bottom project + Set allPushDownExprs = + Sets.union(Sets.union(groupingByExprs, subqueryExprs), distinctAggChildExprs); NormalizeToSlotContext bottomSlotContext = - NormalizeToSlotContext.buildContext(existsAlias, Sets.union(groupingByExprs, subqueryExprs)); - Set bottomOutputs = - bottomSlotContext.pushDownToNamedExpression(Sets.union(groupingByExprs, subqueryExprs)); + NormalizeToSlotContext.buildContext(existsAlias, allPushDownExprs); + Set bottomProjects = + bottomSlotContext.pushDownToNamedExpression(allPushDownExprs); + + // create bottom project + Plan bottomPlan; + if (!bottomProjects.isEmpty()) { + bottomPlan = new LogicalProject<>( + ImmutableList.copyOf(Sets.union(bottomProjects, allAggFuncInputSlots)), + aggregate.child()); + } else { + bottomPlan = aggregate.child(); + } // use group by context to normalize agg functions to process // sql like: select sum(a + 1) from t group by a + 1 @@ -127,89 +178,37 @@ public Rule build() { // after normalize: // agg(output: sum(alias(a + 1)[#1])[#2], group_by: alias(a + 1)[#1]) // +-- project((a[#0] + 1)[#1]) - List normalizedAggFuncs = bottomSlotContext.normalizeToUseSlotRef(aggFuncs); - Set bottomProjects = Sets.newHashSet(bottomOutputs); - // TODO: if we have distinct agg, we must push down its children, - // because need use it to generate distribution enforce - // step 1: split agg functions into 2 parts: distinct and not distinct - List distinctAggFuncs = Lists.newArrayList(); - List nonDistinctAggFuncs = Lists.newArrayList(); - for (AggregateFunction aggregateFunction : normalizedAggFuncs) { - if (aggregateFunction.isDistinct()) { - distinctAggFuncs.add(aggregateFunction); - } else { - nonDistinctAggFuncs.add(aggregateFunction); - } - } - // step 2: if we only have one distinct agg function, we do push down for it - if (!distinctAggFuncs.isEmpty()) { - // process distinct normalize and put it back to normalizedAggFuncs - List newDistinctAggFuncs = Lists.newArrayList(); - Map replaceMap = Maps.newHashMap(); - Map aliasCache = Maps.newHashMap(); - for (AggregateFunction distinctAggFunc : distinctAggFuncs) { - List newChildren = Lists.newArrayList(); - for (Expression child : distinctAggFunc.children()) { - if (child instanceof SlotReference || child instanceof Literal) { - newChildren.add(child); - } else { - NamedExpression alias; - if (aliasCache.containsKey(child)) { - alias = aliasCache.get(child); - } else { - alias = new Alias(child); - aliasCache.put(child, alias); - } - bottomProjects.add(alias); - newChildren.add(alias.toSlot()); - } - } - AggregateFunction newDistinctAggFunc = distinctAggFunc.withChildren(newChildren); - replaceMap.put(distinctAggFunc, newDistinctAggFunc); - newDistinctAggFuncs.add(newDistinctAggFunc); - } - aggregateOutput = aggregateOutput.stream() - .map(e -> ExpressionUtils.replace(e, replaceMap)) - .map(NamedExpression.class::cast) - .collect(Collectors.toList()); - distinctAggFuncs = newDistinctAggFuncs; - } - normalizedAggFuncs = Lists.newArrayList(nonDistinctAggFuncs); - normalizedAggFuncs.addAll(distinctAggFuncs); - // TODO: process redundant expressions in aggregate functions children + + // normalize group by exprs by bottomProjects + List normalizedGroupExprs = + bottomSlotContext.normalizeToUseSlotRef(groupingByExprs); + + // normalize trival-aggs by bottomProjects + List normalizedAggFuncs = + bottomSlotContext.normalizeToUseSlotRef(aggFuncs); + // build normalized agg output NormalizeToSlotContext normalizedAggFuncsToSlotContext = NormalizeToSlotContext.buildContext(existsAlias, normalizedAggFuncs); - // agg output include 2 part, normalized group by slots and normalized agg functions + + // agg output include 2 parts + // all bottom projects(group by exprs are included bottom projects) and normalized agg functions List normalizedAggOutput = ImmutableList.builder() - .addAll(bottomOutputs.stream().map(NamedExpression::toSlot).iterator()) - .addAll(normalizedAggFuncsToSlotContext.pushDownToNamedExpression(normalizedAggFuncs)) + .addAll(bottomProjects.stream().map(NamedExpression::toSlot).iterator()) + .addAll(normalizedAggFuncsToSlotContext + .pushDownToNamedExpression(normalizedAggFuncs)) .build(); - // add normalized agg's input slots to bottom projects - Set bottomProjectSlots = bottomProjects.stream() - .map(NamedExpression::toSlot) - .collect(Collectors.toSet()); - Set aggInputSlots = normalizedAggFuncs.stream() - .map(Expression::getInputSlots) - .flatMap(Set::stream) - .filter(e -> !bottomProjectSlots.contains(e)) - .collect(Collectors.toSet()); - bottomProjects.addAll(aggInputSlots); - // build group by exprs - List normalizedGroupExprs = bottomSlotContext.normalizeToUseSlotRef(groupingByExprs); - Plan bottomPlan; - if (!bottomProjects.isEmpty()) { - bottomPlan = new LogicalProject<>(ImmutableList.copyOf(bottomProjects), aggregate.child()); - } else { - bottomPlan = aggregate.child(); - } + // create new agg node + LogicalAggregate newAggregate = + aggregate.withNormalized(normalizedGroupExprs, normalizedAggOutput, bottomPlan); + // create upper projects by normalize all output exprs in old LogicalAggregate List upperProjects = normalizeOutput(aggregateOutput, bottomSlotContext, normalizedAggFuncsToSlotContext); - return new LogicalProject<>(upperProjects, - aggregate.withNormalized(normalizedGroupExprs, normalizedAggOutput, bottomPlan)); + // create a parent project node + return new LogicalProject<>(upperProjects, newAggregate); }).toRule(RuleType.NORMALIZE_AGGREGATE); } diff --git a/regression-test/suites/nereids_p0/aggregate/agg_distinct_case_when.groovy b/regression-test/suites/nereids_p0/aggregate/agg_distinct_case_when.groovy new file mode 100644 index 00000000000000..546586702e5f4b --- /dev/null +++ b/regression-test/suites/nereids_p0/aggregate/agg_distinct_case_when.groovy @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +suite("agg_distinct_case_when") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql "DROP TABLE IF EXISTS agg_test_table_t;" + sql """ + CREATE TABLE `agg_test_table_t` ( + `k1` varchar(65533) NULL, + `k2` text NULL, + `k3` text null, + `k4` text null + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """insert into agg_test_table_t(`k1`,`k2`,`k3`) values('20231026221524','PA','adigu1bububud');""" + sql """ + select + count(distinct case when t.k2='PA' and loan_date=to_date(substr(t.k1,1,8)) then t.k2 end ) + from ( + select substr(k1,1,8) loan_date,k3,k2,k1 from agg_test_table_t) t + group by + substr(t.k1,1,8);""" + + sql "DROP TABLE IF EXISTS agg_test_table_t;" +} From f1360810c129aeb80a13c31ffade8b454aa93767 Mon Sep 17 00:00:00 2001 From: starocean999 <12095047@qq.com> Date: Wed, 20 Dec 2023 09:40:23 +0800 Subject: [PATCH 2/4] fix failed fe ut --- .../rules/analysis/NormalizeAggregate.java | 68 ++++++++++++------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java index 577d4c6fbf53ba..692ad428d759f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java @@ -43,7 +43,9 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -107,9 +109,10 @@ public Rule build() { // 3. Pull up normalized old LogicalAggregate's output exprs to a new parent LogicalProject Node // Push down exprs: // 1. all group by exprs - // 2. subquery expr in trival-agg - // 3. all input slots of trival-agg - // 4. expr(including subquery) in distinct trival-agg + // 2. child contains subquery expr in trival-agg + // 3. child contains window expr in trival-agg + // 4. all input slots of trival-agg + // 5. expr(including subquery) in distinct trival-agg // Normalize LogicalAggregate's output. // 1. normalize group by exprs by outputs of bottom LogicalProject // 2. normalize trival-aggs by outputs of bottom LogicalProject @@ -132,37 +135,56 @@ public Rule build() { List aggFuncs = Lists.newArrayList(); aggregateOutput.forEach(o -> o.accept(CollectNonWindowedAggFuncs.INSTANCE, aggFuncs)); - // collect subquery expr in trival-agg - Set subqueryExprs = ExpressionUtils.mutableCollect(aggFuncs.stream() - .filter(aggFunc -> !aggFunc.isDistinct()).collect(Collectors.toList()), - SubqueryExpr.class::isInstance); + // split non-distinct agg child as two part + // TRUE part 1: need push down itself, if it contains subqury or window expression + // FALSE part 2: need push down its input slots, if it DOES NOT contain subqury or window expression + Map> categorizedNoDistinctAggsChildren = aggFuncs.stream() + .filter(aggFunc -> !aggFunc.isDistinct()) + .flatMap(agg -> agg.children().stream()) + .collect(Collectors.groupingBy( + child -> child.containsType(SubqueryExpr.class, WindowExpression.class), + Collectors.toSet())); - // collect all input slots of trival-agg - Set allAggFuncInputSlots = aggFuncs.stream() - .flatMap(agg -> agg.getInputSlots().stream()).collect(Collectors.toSet()); + // split non-distinct agg child as two parts + // TRUE part 1: need push down itself, if it is NOT SlotReference or Literal + // FALSE part 2: need push down its input slots, if it is SlotReference or Literal + Map> categorizedDistinctAggsChildren = aggFuncs.stream() + .filter(aggFunc -> aggFunc.isDistinct()).flatMap(agg -> agg.children().stream()) + .collect(Collectors.groupingBy( + child -> !(child instanceof SlotReference || child instanceof Literal), + Collectors.toSet())); - // collect expr in distinct trival-agg - Set distinctAggChildExprs = aggFuncs.stream() - .filter(agg -> agg.isDistinct()).flatMap(agg -> agg.children().stream()) - .filter(child -> !(child instanceof SlotReference && child instanceof Literal)) - .collect(Collectors.toSet()); + Set needPushSelf = Sets.union( + categorizedNoDistinctAggsChildren.getOrDefault(true, new HashSet<>()), + categorizedDistinctAggsChildren.getOrDefault(true, new HashSet<>())); + Set needPushInputSlots = ExpressionUtils.getInputSlotSet(Sets.union( + categorizedNoDistinctAggsChildren.getOrDefault(false, new HashSet<>()), + categorizedDistinctAggsChildren.getOrDefault(false, new HashSet<>()))); Set existsAlias = ExpressionUtils.mutableCollect(aggregateOutput, Alias.class::isInstance); - // push down group by and subquery exprs to bottom project + // push down 3 kinds of exprs, these pushed exprs will be used to normalize agg output later + // 1. group by exprs + // 2. trivalAgg children + // 3. trivalAgg input slots Set allPushDownExprs = - Sets.union(Sets.union(groupingByExprs, subqueryExprs), distinctAggChildExprs); + Sets.union(groupingByExprs, Sets.union(needPushSelf, needPushInputSlots)); NormalizeToSlotContext bottomSlotContext = NormalizeToSlotContext.buildContext(existsAlias, allPushDownExprs); - Set bottomProjects = - bottomSlotContext.pushDownToNamedExpression(allPushDownExprs); + Set pushedGroupByExprs = + bottomSlotContext.pushDownToNamedExpression(groupingByExprs); + Set pushedTrivalAggChildren = + bottomSlotContext.pushDownToNamedExpression(needPushSelf); + Set pushedTrivalAggInputSlots = + bottomSlotContext.pushDownToNamedExpression(needPushInputSlots); + Set bottomProjects = Sets.union(pushedGroupByExprs, + Sets.union(pushedTrivalAggChildren, pushedTrivalAggInputSlots)); // create bottom project Plan bottomPlan; if (!bottomProjects.isEmpty()) { - bottomPlan = new LogicalProject<>( - ImmutableList.copyOf(Sets.union(bottomProjects, allAggFuncInputSlots)), + bottomPlan = new LogicalProject<>(ImmutableList.copyOf(bottomProjects), aggregate.child()); } else { bottomPlan = aggregate.child(); @@ -192,9 +214,9 @@ public Rule build() { NormalizeToSlotContext.buildContext(existsAlias, normalizedAggFuncs); // agg output include 2 parts - // all bottom projects(group by exprs are included bottom projects) and normalized agg functions + // pushedGroupByExprs and normalized agg functions List normalizedAggOutput = ImmutableList.builder() - .addAll(bottomProjects.stream().map(NamedExpression::toSlot).iterator()) + .addAll(pushedGroupByExprs.stream().map(NamedExpression::toSlot).iterator()) .addAll(normalizedAggFuncsToSlotContext .pushDownToNamedExpression(normalizedAggFuncs)) .build(); From 50c1a482520d5f0b2339eb541ff976b8dd47143e Mon Sep 17 00:00:00 2001 From: starocean999 <12095047@qq.com> Date: Wed, 20 Dec 2023 14:13:33 +0800 Subject: [PATCH 3/4] fix out file --- .../noStatsRfPrune/query16.out | 57 +++++++++-------- .../noStatsRfPrune/query94.out | 57 +++++++++-------- .../no_stats_shape/query16.out | 57 +++++++++-------- .../no_stats_shape/query94.out | 57 +++++++++-------- .../rf_prune/query16.out | 61 +++++++++---------- .../rf_prune/query94.out | 53 ++++++++-------- .../shape/query16.out | 61 +++++++++---------- .../shape/query94.out | 53 ++++++++-------- .../window_functions/test_window_fn.groovy | 7 +-- 9 files changed, 227 insertions(+), 236 deletions(-) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out index d7431a4cc9001a..b08f9d62730057 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query16.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() -----------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] -------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((customer_address.ca_state = 'WV')) -------------------------------------PhysicalOlapScan[customer_address] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() +--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number] ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) -------------------------------PhysicalOlapScan[call_center] +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 +------------------------------PhysicalDistribute +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((customer_address.ca_state = 'WV')) +----------------------------------PhysicalOlapScan[customer_address] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[catalog_returns] +--------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) +----------------------------PhysicalOlapScan[call_center] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_returns] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out index 879e2acc49605e..a7f921c597b541 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/noStatsRfPrune/query94.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() -----------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] -------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2 ws_order_number->[ws_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_ship_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF3 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((customer_address.ca_state = 'OK')) -------------------------------------PhysicalOlapScan[customer_address] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() +--------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2 ws_order_number->[ws_order_number] ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------filter((web_site.web_company_name = 'pri')) -------------------------------PhysicalOlapScan[web_site] +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_ship_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF3 +------------------------------PhysicalDistribute +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((customer_address.ca_state = 'OK')) +----------------------------------PhysicalOlapScan[customer_address] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[web_returns] +--------------------------filter((web_site.web_company_name = 'pri')) +----------------------------PhysicalOlapScan[web_site] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_returns] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out index d7431a4cc9001a..b08f9d62730057 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query16.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() -----------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] -------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((customer_address.ca_state = 'WV')) -------------------------------------PhysicalOlapScan[customer_address] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() +--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF2 cs_order_number->[cs_order_number] ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) -------------------------------PhysicalOlapScan[call_center] +----------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[cs_ship_addr_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_ship_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 +------------------------------PhysicalDistribute +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((customer_address.ca_state = 'WV')) +----------------------------------PhysicalOlapScan[customer_address] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[catalog_returns] +--------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) +----------------------------PhysicalOlapScan[call_center] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_returns] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out index 879e2acc49605e..a7f921c597b541 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/no_stats_shape/query94.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() -----------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] -------------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2 ws_order_number->[ws_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_ship_date_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF3 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((customer_address.ca_state = 'OK')) -------------------------------------PhysicalOlapScan[customer_address] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[LEFT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() +--------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF3 web_site_sk->[ws_web_site_sk] +----------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF2 ws_order_number->[ws_order_number] ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------filter((web_site.web_company_name = 'pri')) -------------------------------PhysicalOlapScan[web_site] +----------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF1 ca_address_sk->[ws_ship_addr_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_ship_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF3 +------------------------------PhysicalDistribute +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((customer_address.ca_state = 'OK')) +----------------------------------PhysicalOlapScan[customer_address] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[web_returns] +--------------------------filter((web_site.web_company_name = 'pri')) +----------------------------PhysicalOlapScan[web_site] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_returns] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out index b0921bb9f0c35d..f715eb2136dd41 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query16.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number] -----------------------PhysicalDistribute -------------------------PhysicalProject ---------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 -----------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] -------------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF2 cs_order_number->[cr_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2 ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((customer_address.ca_state = 'WV')) ---------------------------------------PhysicalOlapScan[customer_address] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) -------------------------------------PhysicalOlapScan[date_dim] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 +--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] +----------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF2 cs_order_number->[cr_order_number] ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) -------------------------------PhysicalOlapScan[call_center] +----------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 +------------------------------PhysicalDistribute +--------------------------------PhysicalProject +----------------------------------filter((customer_address.ca_state = 'WV')) +------------------------------------PhysicalOlapScan[customer_address] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) +----------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) +----------------------------PhysicalOlapScan[call_center] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out index 4fb347a2c78771..4c4f311c28185b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query94.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 ws_order_number->[ws_order_number] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 ws_order_number->[ws_order_number] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 +--------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() build RFs:RF3 ws_order_number->[wr_order_number] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 -----------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() build RFs:RF3 ws_order_number->[wr_order_number] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[web_returns] apply RFs: RF3 -------------------------PhysicalDistribute ---------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF2 web_site_sk->[ws_web_site_sk] -----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((customer_address.ca_state = 'OK')) ---------------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalOlapScan[web_returns] apply RFs: RF3 +----------------------PhysicalDistribute +------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF2 web_site_sk->[ws_web_site_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) -------------------------------------PhysicalOlapScan[date_dim] +----------------------------------filter((customer_address.ca_state = 'OK')) +------------------------------------PhysicalOlapScan[customer_address] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((web_site.web_company_name = 'pri')) -----------------------------------PhysicalOlapScan[web_site] +--------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((web_site.web_company_name = 'pri')) +--------------------------------PhysicalOlapScan[web_site] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out index b0921bb9f0c35d..f715eb2136dd41 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query16.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number] -----------------------PhysicalDistribute -------------------------PhysicalProject ---------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 -----------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] -------------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF2 cs_order_number->[cr_order_number] ---------------------------PhysicalDistribute -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2 ---------------------------PhysicalDistribute -----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((customer_address.ca_state = 'WV')) ---------------------------------------PhysicalOlapScan[customer_address] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) -------------------------------------PhysicalOlapScan[date_dim] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 +--------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF3 cc_call_center_sk->[cs_call_center_sk] +----------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF2 cs_order_number->[cr_order_number] ------------------------PhysicalDistribute --------------------------PhysicalProject -----------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) -------------------------------PhysicalOlapScan[call_center] +----------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF2 +------------------------PhysicalDistribute +--------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF3 +------------------------------PhysicalDistribute +--------------------------------PhysicalProject +----------------------------------filter((customer_address.ca_state = 'WV')) +------------------------------------PhysicalOlapScan[customer_address] +----------------------------PhysicalDistribute +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) +----------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------filter(cc_county IN ('Barrow County', 'Daviess County', 'Luce County', 'Richland County', 'Ziebach County')) +----------------------------PhysicalOlapScan[call_center] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out index 4fb347a2c78771..4c4f311c28185b 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query94.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 ws_order_number->[ws_order_number] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 ws_order_number->[ws_order_number] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 +--------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() build RFs:RF3 ws_order_number->[wr_order_number] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 -----------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() build RFs:RF3 ws_order_number->[wr_order_number] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[web_returns] apply RFs: RF3 -------------------------PhysicalDistribute ---------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF2 web_site_sk->[ws_web_site_sk] -----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((customer_address.ca_state = 'OK')) ---------------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalOlapScan[web_returns] apply RFs: RF3 +----------------------PhysicalDistribute +------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF2 web_site_sk->[ws_web_site_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) -------------------------------------PhysicalOlapScan[date_dim] +----------------------------------filter((customer_address.ca_state = 'OK')) +------------------------------------PhysicalOlapScan[customer_address] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((web_site.web_company_name = 'pri')) -----------------------------------PhysicalOlapScan[web_site] +--------------------------------filter((date_dim.d_date <= '2000-04-01') and (date_dim.d_date >= '2000-02-01')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((web_site.web_company_name = 'pri')) +--------------------------------PhysicalOlapScan[web_site] diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy index f4b62846bca0ac..17f1b6aa87dd92 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_fn.groovy @@ -383,10 +383,9 @@ suite("test_window_fn", "arrow_flight_sql") { "storage_format" = "V2" ); """ - test { - sql """SELECT SUM(MAX(c1) OVER (PARTITION BY c2, c3)) FROM test_window_in_agg;""" - exception "errCode = 2, detailMessage = AGGREGATE clause must not contain analytic expressions" - } + sql """set enable_nereids_planner=true;""" + sql """SELECT SUM(MAX(c1) OVER (PARTITION BY c2, c3)) FROM test_window_in_agg;""" + sql "DROP TABLE IF EXISTS test_window_in_agg;" } From a28ee684c0d6d29e6351f7009441e8f4b9c60893 Mon Sep 17 00:00:00 2001 From: starocean999 <12095047@qq.com> Date: Wed, 20 Dec 2023 15:20:14 +0800 Subject: [PATCH 4/4] fix out file --- .../rules/analysis/NormalizeAggregate.java | 2 +- .../shape/query16.out | 53 +++++++++---------- .../shape/query94.out | 53 +++++++++---------- 3 files changed, 53 insertions(+), 55 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java index 692ad428d759f8..a7eb7c7e5cc42d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java @@ -145,7 +145,7 @@ public Rule build() { child -> child.containsType(SubqueryExpr.class, WindowExpression.class), Collectors.toSet())); - // split non-distinct agg child as two parts + // split distinct agg child as two parts // TRUE part 1: need push down itself, if it is NOT SlotReference or Literal // FALSE part 2: need push down its input slots, if it is SlotReference or Literal Map> categorizedDistinctAggsChildren = aggFuncs.stream() diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out index 1b116185aeb9e7..3fe19ee10d9dcd 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query16.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((cs1.cs_order_number = cs2.cs_order_number)) otherCondition=(( not (cs_warehouse_sk = cs_warehouse_sk))) build RFs:RF4 cs_order_number->[cs_order_number] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 +--------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF3 cs_order_number->[cr_order_number] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF4 -----------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((cs1.cs_order_number = cr1.cr_order_number)) otherCondition=() build RFs:RF3 cs_order_number->[cr_order_number] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3 -------------------------PhysicalDistribute ---------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] -----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((customer_address.ca_state = 'PA')) ---------------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalOlapScan[catalog_returns] apply RFs: RF3 +----------------------PhysicalDistribute +------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_call_center_sk = call_center.cc_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_ship_date_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((cs1.cs_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[cs_ship_addr_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) -------------------------------------PhysicalOlapScan[date_dim] +----------------------------------filter((customer_address.ca_state = 'PA')) +------------------------------------PhysicalOlapScan[customer_address] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((call_center.cc_county = 'Williamson County')) -----------------------------------PhysicalOlapScan[call_center] +--------------------------------filter((date_dim.d_date <= '2002-05-31') and (date_dim.d_date >= '2002-04-01')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((call_center.cc_county = 'Williamson County')) +--------------------------------PhysicalOlapScan[call_center] diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out index b7cc3a9c809333..4e6d82d7ab9355 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query94.out @@ -3,37 +3,36 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalTopN[LOCAL_SORT] -------PhysicalProject ---------hashAgg[DISTINCT_GLOBAL] -----------PhysicalDistribute -------------hashAgg[DISTINCT_LOCAL] ---------------hashAgg[GLOBAL] -----------------hashAgg[LOCAL] -------------------PhysicalProject ---------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 ws_order_number->[ws_order_number] +------hashAgg[DISTINCT_GLOBAL] +--------PhysicalDistribute +----------hashAgg[DISTINCT_LOCAL] +------------hashAgg[GLOBAL] +--------------hashAgg[LOCAL] +----------------PhysicalProject +------------------hashJoin[RIGHT_SEMI_JOIN] hashCondition=((ws1.ws_order_number = ws2.ws_order_number)) otherCondition=(( not (ws_warehouse_sk = ws_warehouse_sk))) build RFs:RF4 ws_order_number->[ws_order_number] +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 +--------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() build RFs:RF3 ws_order_number->[wr_order_number] ----------------------PhysicalDistribute ------------------------PhysicalProject ---------------------------PhysicalOlapScan[web_sales] apply RFs: RF4 -----------------------hashJoin[RIGHT_ANTI_JOIN] hashCondition=((ws1.ws_order_number = wr1.wr_order_number)) otherCondition=() build RFs:RF3 ws_order_number->[wr_order_number] -------------------------PhysicalDistribute ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[web_returns] apply RFs: RF3 -------------------------PhysicalDistribute ---------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF2 web_site_sk->[ws_web_site_sk] -----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] -------------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 ---------------------------------PhysicalDistribute -----------------------------------PhysicalProject -------------------------------------filter((customer_address.ca_state = 'OK')) ---------------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalOlapScan[web_returns] apply RFs: RF3 +----------------------PhysicalDistribute +------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_web_site_sk = web_site.web_site_sk)) otherCondition=() build RFs:RF2 web_site_sk->[ws_web_site_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ws_ship_date_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((ws1.ws_ship_addr_sk = customer_address.ca_address_sk)) otherCondition=() build RFs:RF0 ca_address_sk->[ws_ship_addr_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 RF2 ------------------------------PhysicalDistribute --------------------------------PhysicalProject -----------------------------------filter((date_dim.d_date <= '2002-06-30') and (date_dim.d_date >= '2002-05-01')) -------------------------------------PhysicalOlapScan[date_dim] +----------------------------------filter((customer_address.ca_state = 'OK')) +------------------------------------PhysicalOlapScan[customer_address] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((web_site.web_company_name = 'pri')) -----------------------------------PhysicalOlapScan[web_site] +--------------------------------filter((date_dim.d_date <= '2002-06-30') and (date_dim.d_date >= '2002-05-01')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((web_site.web_company_name = 'pri')) +--------------------------------PhysicalOlapScan[web_site]