diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c index 9c24402045d..1a8f48e0668 100644 --- a/src/backend/optimizer/plan/aqumv.c +++ b/src/backend/optimizer/plan/aqumv.c @@ -80,6 +80,7 @@ static bool aqumv_process_targetlist(aqumv_equivalent_transformation_context *co static void aqumv_sort_targetlist(aqumv_equivalent_transformation_context* context); static Node *aqumv_adjust_sub_matched_expr_mutator(Node *node, aqumv_equivalent_transformation_context *context); static bool contain_var_or_aggstar_clause_walker(Node *node, void *context); +static bool check_partition(Query *parse, Oid origin_rel_oid); typedef struct { @@ -156,6 +157,8 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont List *mv_final_tlist = NIL; /* Final target list we want to rewrite to. */ List *post_quals = NIL; aqumv_equivalent_transformation_context *context; + bool can_be_partition; + char relkind; /* Group By without agg could be possible though IMMV doesn't support it yet. */ bool can_not_use_mv = (parse->commandType != CMD_SELECT) || @@ -173,10 +176,6 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont if (can_not_use_mv) return mv_final_rel; - /* - * AQUMV_FIXME_MVP: - * Single relation, excluding catalog/inherit/partition tables. - */ if (list_length(parse->jointree->fromlist) != 1) return mv_final_rel; @@ -185,18 +184,37 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont return mv_final_rel; varno = ((RangeTblRef *) jtnode)->rtindex; - rte = root->simple_rte_array[varno]; - Assert(rte != NULL); + rte = planner_rt_fetch(varno, root); + if ((rte->rtekind != RTE_RELATION)) + return mv_final_rel; + /* root's stuff like simple_rte_array may be changed during rewrite, fetch oid here. */ origin_rel_oid = rte->relid; + /* excluding catalog tables. */ + if (IsSystemClassByRelid(origin_rel_oid)) + return mv_final_rel; - if ((rte->rtekind != RTE_RELATION) || - IsSystemClassByRelid(origin_rel_oid) || - has_superclass(origin_rel_oid) || - has_subclass(origin_rel_oid)) + relkind = get_rel_relkind(rte->relid); + if (relkind != RELKIND_RELATION && + relkind != RELKIND_PARTITIONED_TABLE && + relkind != RELKIND_FOREIGN_TABLE) return mv_final_rel; - if (get_rel_relkind(origin_rel_oid) == RELKIND_FOREIGN_TABLE && !aqumv_allow_foreign_table) + /* We don't know what it is. */ + if ((relkind != RELKIND_PARTITIONED_TABLE) && + (list_length(parse->rtable) > 1)) + return mv_final_rel; + + /* + * excluding inherit tables. + */ + can_be_partition = (relkind == RELKIND_PARTITIONED_TABLE) || get_rel_relispartition(rte->relid); + if (!can_be_partition && + (has_superclass(origin_rel_oid) || + has_subclass(origin_rel_oid))) + return mv_final_rel; + + if (relkind == RELKIND_FOREIGN_TABLE && !aqumv_allow_foreign_table) return mv_final_rel; ruleDesc = table_open(RewriteRelationId, AccessShareLock); @@ -293,11 +311,19 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont /* * Check if it actually has children here to match before planning. + * Except for Partitioned tables. */ - mvrte->inh = has_subclass(mvrte->relid); + if (get_rel_relkind(mvrte->relid) == RELKIND_PARTITIONED_TABLE) + mvrte->inh = false; + else + mvrte->inh = has_subclass(mvrte->relid); + if (mvrte->inh) continue; + if (!check_partition(parse, origin_rel_oid)) + continue; + subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); memcpy(subroot, root, sizeof(PlannerInfo)); subroot->parent_root = root; @@ -348,7 +374,7 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont { if (parse->hasDistinctOn || parse->distinctClause != NIL || - parse->groupClause != NIL || + parse->groupClause != NIL || /* TODO: GROUP BY */ parse->groupingSets != NIL || parse->groupDistinct) continue; @@ -446,6 +472,9 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont */ subroot->processed_tlist = NIL; preprocess_targetlist(subroot); + + /* Select from a mv never have that.*/ + subroot->append_rel_list = NIL; } else { @@ -519,6 +548,8 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont continue; viewQuery->jointree->quals = (Node *)post_quals; + /* Select from a mv never have that.*/ + subroot->append_rel_list = NIL; } /* @@ -581,6 +612,7 @@ answer_query_using_materialized_views(PlannerInfo *root, AqumvContext aqumv_cont * See more in README.cbdb.aqumv */ root->eq_classes = subroot->eq_classes; + root->append_rel_list = subroot->append_rel_list; current_rel = mv_final_rel; table_close(matviewRel, NoLock); need_close = false; @@ -946,3 +978,38 @@ static Node *aqumv_adjust_varno_mutator(Node *node, aqumv_adjust_varno_context * ((RangeTblRef*) node)->rtindex = context->varno; return expression_tree_mutator(node, aqumv_adjust_varno_mutator, context); } + +/* + * check_partition - Check if the query's range table entries align with the partitioned table structure. + * + * This function verifies whether the range table entries in the query (parse->rtable) correspond to + * the expected structure of a partitioned table. It ensures that all range table entries beyond the + * first one match the name of the underlying relation (origin_rel_oid). + * While this behavior is not guaranteed by Postgres, we can rely on it based on our observation of + * the internal implementation when expanding partitioned tables. + * This approach is admittedly hacky, but it serves as a practical solution for now, allowing us to move forward. + * + * Parameters: + * - parse: The query parse tree containing the range table entries to be checked. + * - origin_rel_oid: The OID of the original relation (partitioned table) to compare against. + * + * Returns: + * - true if all range table entries beyond the first match the underlying relation's name. + * - false otherwise. + */ +static bool +check_partition(Query *parse, Oid origin_rel_oid) +{ + char *underling_relname; + + if (list_length(parse->rtable) == 1) + return true; + underling_relname = get_rel_name(origin_rel_oid); + for (int i = 2; i <= list_length(parse->rtable); i++) + { + RangeTblEntry *other_rte = rt_fetch(i, parse->rtable); + if (strcmp(underling_relname, other_rte->alias->aliasname) != 0) + return false; + } + return true; +} diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out index a978409f8ee..557e97149e4 100644 --- a/src/test/regress/expected/aqumv.out +++ b/src/test/regress/expected/aqumv.out @@ -3167,6 +3167,94 @@ select count(*), sum(c1) from t where c1 > 90 limit all; (1 row) abort; +-- +-- test partitioned tables +-- +create table par(a int, b int, c int) partition by range(b) + subpartition by range(c) subpartition template (start (1) end (3) every (1)) + (start(1) end(3) every(1)); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); +insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); +insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); +create materialized view mv_par as select count(*) from par; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create materialized view mv_par1 as select count(*) from par_1_prt_1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create materialized view mv_par1_1 as select count(*) from par_1_prt_1_2_prt_1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create materialized view mv_par1_2 as select count(*) from par_1_prt_1_2_prt_2; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create materialized view mv_par2 as select count(*) from par_1_prt_2; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create materialized view mv_par2_2 as select count(*) from par_1_prt_2_2_prt_1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create materialized view mv_par_prune as select count(*) from par where b = 1; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'count' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +set enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(*) from par; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: count + -> Seq Scan on aqumv.mv_par + Output: count + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +explain(costs off, verbose) +select count(*) from par_1_prt_1; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: count + -> Seq Scan on aqumv.mv_par1 + Output: count + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +-- test partition_pruning +set enable_partition_pruning = on; +explain(costs off, verbose) +select count(*) from par where b = 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: count + -> Seq Scan on aqumv.mv_par_prune + Output: count + Settings: enable_answer_query_using_materialized_views = 'on', enable_partition_pruning = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +set enable_partition_pruning = off; +explain(costs off, verbose) +select count(*) from par where b = 1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: count + -> Seq Scan on aqumv.mv_par_prune + Output: count + Settings: enable_answer_query_using_materialized_views = 'on', enable_partition_pruning = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +reset enable_partition_pruning; +-- +-- End of test partitioned tables +-- reset optimizer; reset enable_answer_query_using_materialized_views; -- start_ignore diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql index 708c268c7be..244c2a27997 100644 --- a/src/test/regress/sql/aqumv.sql +++ b/src/test/regress/sql/aqumv.sql @@ -795,6 +795,41 @@ select count(*), sum(c1) from t where c1 > 90 limit all; abort; +-- +-- test partitioned tables +-- +create table par(a int, b int, c int) partition by range(b) + subpartition by range(c) subpartition template (start (1) end (3) every (1)) + (start(1) end(3) every(1)); +insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); +insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); +insert into par values(1, 1, 1), (1, 1, 2), (2, 2, 1), (2, 2, 2); +create materialized view mv_par as select count(*) from par; +create materialized view mv_par1 as select count(*) from par_1_prt_1; +create materialized view mv_par1_1 as select count(*) from par_1_prt_1_2_prt_1; +create materialized view mv_par1_2 as select count(*) from par_1_prt_1_2_prt_2; +create materialized view mv_par2 as select count(*) from par_1_prt_2; +create materialized view mv_par2_2 as select count(*) from par_1_prt_2_2_prt_1; +create materialized view mv_par_prune as select count(*) from par where b = 1; +set enable_answer_query_using_materialized_views = on; + +explain(costs off, verbose) +select count(*) from par; +explain(costs off, verbose) +select count(*) from par_1_prt_1; + +-- test partition_pruning +set enable_partition_pruning = on; +explain(costs off, verbose) +select count(*) from par where b = 1; +set enable_partition_pruning = off; +explain(costs off, verbose) +select count(*) from par where b = 1; +reset enable_partition_pruning; +-- +-- End of test partitioned tables +-- + reset optimizer; reset enable_answer_query_using_materialized_views; -- start_ignore