From 7522c1c73cd55b8eaedf0ca87237659c6a6a6e6b Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 14 Nov 2023 10:54:41 +0800 Subject: [PATCH] Answer Query Using Materialized Views. AQUMV for short, is used to compute part or all of a Query from materialized views during planning. It could provide massive improvements in query processing time, especially for aggregation queries over large tables[1]. AQUMV usually uses Incremental Materialized Views(IMV) as candidates, as IMV usually have real time data when there are writable operations on related tables. AQUMV is actually a Equivalent Transformation on Query tree. A materialized view(MV) could be use to compute a Query if: 1.The view contains all rows needed by the query expression. If MV has more rows than query wants, additional filter may be added if possible. 2.All output expressions can be computed from the output of the view. The output expressions could be fully or partially matched from MV's target list. 3.Cost-based. There may be multiple valid MV candidates, or select from MV is not better than select from origin table(ex: has an index and etc), let planner decide the best one. Construct rows by splitting MV query quals(mv_query_quals) and Query quals (origin_query_quals) to difference set and intersection set. And post_quals formed by:{origin_query_quals - mv_query_quals} will be processed by MV query's target list, and rewritten to MV relation's target list expressions. Construct columns expressions using a Greedy Algorithm. Sort the MV query's target list by complexity, and try to rewrite expressions by that order. Expressions that have no Vars are kept to upper(Const Expressions) or be rewritten if there were corresponding expressions. Reference: [1] Optimizing Queries Using Materialized Views: A Practical, Scalable Solution. https://courses.cs.washington.edu/courses/cse591d/01sp/opt_views.pdf Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/optimizer/README.cbdb.aqumv | 241 ++++++++ src/backend/optimizer/plan/planmain.c | 5 + src/backend/optimizer/plan/planner.c | 749 ++++++++++++++++++++++++ src/backend/utils/misc/guc_gp.c | 13 +- src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 1 + src/test/regress/expected/aqumv.out | 737 +++++++++++++++++++++++ src/test/regress/greenplum_schedule | 3 + src/test/regress/sql/aqumv.sql | 227 +++++++ 9 files changed, 1976 insertions(+), 1 deletion(-) create mode 100644 src/backend/optimizer/README.cbdb.aqumv create mode 100644 src/test/regress/expected/aqumv.out create mode 100644 src/test/regress/sql/aqumv.sql diff --git a/src/backend/optimizer/README.cbdb.aqumv b/src/backend/optimizer/README.cbdb.aqumv new file mode 100644 index 00000000000..975a4b87da6 --- /dev/null +++ b/src/backend/optimizer/README.cbdb.aqumv @@ -0,0 +1,241 @@ +src/backend/optimizer/README.cbdb.aqumv + +Portions Copyright (c) 2023, HashData Technology Limited. + +Author +============ +Zhang Mingli avamingli@gmail.com + + +Answer Query Using Materialized Views +===================================== + +AQUMV for short, is used to compute part or all of a Query from materialized views during planning. +It could provide massive improvements in query processing time, especially for aggregation queries over large tables[1]. + +AQUMV usually uses Incremental Materialized Views(IMV) as candidates, as IMV has real time data +when there are writable operations on related tables. + +Basic Theory +------------ + +A materialized view(MV) could be use to compute a Query if: +1. The view contains all rows needed by the query expression(Construct Rows). + If MV has more rows than query wants, additional filter may be added if possible. +2. All output expressions can be computed from the output of the view(Construct Columns). + The output expressions could be fully or partially matched from MV's TargetList. +3. Cost-based Equivalent Transformation. + There may be multiple valid MV candidates, or select from MV is not better than + select from origin table(ex: has an index and etc), let planner decide the best one. + +Vocabulary: + origin_query: the SQL we want to query. + mv_query: for materialized view's corresponding query, the SELECT part of a Create Materialized View. + +Construct Rows +-------------- + +If MV has all rows of query needed, it means that MV query's restrictions are looser than query's restrictions. +For AQUMV_MVP0, we only do logistic transformation. +All rewrites are on the Query tree, neither Equivalent Classes nor Restrictions are used. +For a single relation: +process mv_query and origin_query's WHERE part to set: +mv_query_quals and origin_query_quals. + +example0: + CREATE MATERIALIZED VIEW mv0 AS SELECT * FROM t WHERE a = 1 AND b = 2; + Query: SELECT * FROM t WHERE a = 1; + +mv_query_quals = {a = 1, b = 2}. +origin_query_quals = {a = 1}. + +1) A MV can't be used if the difference set: {mv_query_quals - origin_query_quals} is not empty. + +It 'typically' means that the MV has less rows than origin_query wants. +For example0, the difference set is: + mv_query_quals - origin_query_quals = {b = 2}. + +mv0's all rows meet requirement {a = 1 and b = 2}, but we only want rows {a = 1}. +mv0 couldn't provide all the rows we want, we can't use it to answer the query. + +'typically' means that if there are range quals, this conclusion is not sure. +But we couldn't handle that for now. + +2) The intersection set: {mv_query_quals ∩ origin_query_quals} should be dropped. + +If the intersection set is not empty, we choose to drop it. +example1: + CREATE MATERIALIZED VIEW mv1 AS SELECT * FROM t WHERE a = 1; + Query: SELECT * FROM t WHERE a = 1; + +{mv_query_quals ∩ origin_query_quals} = {a = 1}; + +It seems everything is good and we have nothing more to do. +Because the two quals are the same and we could rewrite the SQL to: + Rewritten SQL: SELECT * FROM mv1 WHERE a = 1; + +As all mv1's rows meet the requirement: a = 1, it's pointless that we do filter a = 1 again at execution. + +What's worse is the unnecessary filter {a = 1} will mislead the clause-selectivity of the relation. +For the example1, a {a = 1} will estimate less rows from relation MV, but as we are clear that all rows +meet the requirement, and the selectivity from mv1 should be 100%. + +Another reason we dropped the intersection set is: we couldn't just keep the intersection set. +example2: + CREATE MATERIALIZED VIEW mv2 AS SELECT b FROM t WHERE a = 1; + Query: SELECT b FROM t WHERE a = 1; + +{mv_query_quals ∩ origin_query_quals} = {a = 1}; + +mv2 and origin_query only select column b from t with the same quals {a = 1}. +If the intersection set is kept, we will get a wrong SQL: + Wrong: SELECT b FROM mv2 WHERE a = 1; + +mv2 doesn't have column a, the SQL will get a syntax error. + +It's not always impossible to keep the intersection set, for +example3: + CREATE MATERIALIZED VIEW mv3 AS SELECT a, b FROM t WHERE a = 1; + Query: SELECT a, b FROM t WHERE a = 1; + +We could rewrite it to: + SELECT a, b FROM mv3 WHERE a = 1; + +There is a way to see if it's possible to rewrite that, but it isn't worth trying according to +what we mentioned above. + +The disadvantages of dropping the intersection set of mv_query_quals and origin_query_quals is: +We may lose some Equivalent Classes if there are equal operations like: a = 1. +But not for other operations, ex: c > 1, because Postgres only have Equivalent Class for equal operations. +And we haven't taken Equivalent Class into account for AQUMV_MVP0, it's reasonable to drop that. + +3) process difference set: {origin_query_quals - mv_query_quals} +If 1) and 2) passed, the difference set on the other hand, we call it post_quals: + + post_quals = {origin_query_quals - mv_query_quals} + +The MV has more rows than query if post_quals is not empty. +We have to add it to MV to filter the rows query want. +example4: + CREATE MATERIALIZED VIEW mv4 AS SELECT a, b FROM t WHERE a = 1; + Query: SELECT a, b FROM t WHERE a = 1 and b = 2; + +We could rewrite it to: + SELECT a, b FROM mv4 WHERE b = 2; + +All rows in MV are {a = 1} ones as the MV defination, we only need to add the extra filter {b = 2}. + +But it's not always true, if we don't have the columns that the post_quals need. +example5: + CREATE MATERIALIZED VIEW mv5 AS SELECT a FROM t WHERE a = 1; + Query: SELECT a FROM t WHERE a = 1 and b = 2; + +mv5 has all rows {a = 1} and only have column 'a', but the query want additional filter {b = 2}. +We couldn't rewrite it by just adding the {b = 2} to MV as no equivalent b in MV relation. + Wrong: SELECT a FROM mv5 WHERE b = 2; + +The algorithem behind that is: all quals's expression could be computed from a mv_query's target list. +That's what Construct Columns does. + +Construct Columns +----------------- + +A MV could be a candidate if the query's target list and the post_quals could be computed form +mv_query's target list and rewrite to expressions bases on MV relation's columns itself. + +example6: + CREATE MATERIALIZED VIEW mv6 AS SELECT abs(c) as mc1, b as mc2 FROM t WHERE a = 1; + Query: SELECT abs(c) as res1 FROM t WHERE a = 1 and b = 2; + +The post_quals is: {b = 2} while column b exists in mv6, corresponding to mc2 with alias. +We can rewrite post_quals to {mc2 = 2}. + +The query wants a target abs(c) with an alias res1, while expression abs(c) exists in mv6, +corresponding to column mc1 with alias. +Then we can rewrite SQL to: + + Rewrite: SELECT mc1 as res1 FROM mv6 WHERE mc2 = 2; + +The expression abs(c) is eliminated and simplified to a column reference mc1, and the alias is kept. + +Things become complex when there are multiple expression candidates, and some ones could be +part of others. + +example7: + CREATE MATERIALIZED VIEW mv7 AS + SELECT c1 AS mc1, c2 AS mc2, abs(c2) AS mc3, abs(abs(c2) - c1 - 1) AS mc4 + FROM t1 WHERE c1 > 30 AND c1 < 40; + Query: SELECT sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) AS res1 FROM t1 WHERE c1 > 30 AND c1 < 40 AND c2 > 23; + +There are many choices to construct column res1: + sqrt(abs(abs(mc2) - mc1 - 1) + abs(mc2)) // constructed by mc1, mc2 + sqrt(abs(abs(mc2) - mc1 - 1) + mc3)) // constructed by mc1, mc2, mc3 + sqrt(abs(mc3 - mc1 - 1) + abs(mc2))) // constructed by mc1, mc2, mc3 + sqrt(abs(mc3 - mc1 - 1) + mc3)) // constructed by mc1, mc3 + sqrt(mc4 + mc3)) // constructed by mc3, mc4 + +Obviously, the best one is sqrt(mc4 + mc3) which avoids much of expression execution for each row. + +We try to use the most-submatched expression to do a first rewrite and then next. +It's not only optimization, but also unnecessary for some cases that a less-matched expression +rewrite may close the door for more-matched ones, especially for post_quals rewrite. +example8: + CREATE MATERIALIZED VIEW mv8 AS + SELECT c2 as mc3, c2 AS mc2, abs(c2) AS m_abs_c2 + FROM t1 WHERE c1 > 1; + Query: SELECT c3 AS res1 FROM t1 WHERE c1 > 1 and (abs(c2) - c1 - 1) > 10; + + post_quals: {(abs(c2) - c1 - 1) > 10} + +If we choose less-matched mc2 to rewrite, an intermediate expression would be: + {(abs(mc2) - c1 - 1) > 10} + +But mv8 don't have a corresponding column c1 to continue the work, that's bad and we will lose +the chance to use it. + +The approach is: use a Greedy Algorithm to rewrite the target expression. + +First, Split the MV query's expressions to pure-Var and nonpure-Var ones. +Because pure Var expression is always the leaf of an expression tree if it needs to be rewritten. + +Sort the nonpure-Var expressions by complexity. +We don't need an absolute order for every expression. +All we need to guarantee is that: +if expression A is sub part of expression B, put A after B. + +The approach applies to post_quals rewrite too. + +Expressions that have no Vars are kept to upper(ex: Const Expressions) or rewritten if there were +corresponding expressions. + + +Cost-based +---------- + +There could be multiple candidates after equivalent transformation. +After all things is done for a materialized view candidate, build a plan to compare with current one. +Let the planner decide the best one. + +AQUMV_MVP +--------- +Support SELECT FROM a single relation both for mv_query and the origin_query. +Below are not supported now: + AGG + Subquery + Order by(for origin_query) + Join + Sublink + Group by + Window Functions + CTE + Distinct On + Refresh Materialized View + Create AS + Partition Tables + Inherit Tables + +Reference: + [1] Optimizing Queries Using Materialized Views: A Practical, Scalable Solution. + https://courses.cs.washington.edu/courses/cse591d/01sp/opt_views.pdf + [2] Automated Selection of Materialized Views and Indexes for SQL Databases. + https://www.vldb.org/conf/2000/P496.pdf diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 9966e97f4ed..b09a2fb8735 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -240,7 +240,12 @@ query_planner(PlannerInfo *root, * generate pathkeys in canonical form; so compute query_pathkeys and * other pathkeys fields in PlannerInfo. */ + /* AQUMV_FIXME_MVP: qp_callback may be NULL. */ +#if 0 (*qp_callback) (root, qp_extra); +#endif + if (qp_callback != NULL) + (*qp_callback) (root, qp_extra); /* * Examine any "placeholder" expressions generated during subquery pullup. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5447e311d8f..bfbdf677f31 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -86,6 +86,17 @@ #include "storage/lmgr.h" #include "utils/guc.h" +#include "access/genam.h" +#include "access/htup_details.h" +#include "access/table.h" +#include "catalog/catalog.h" +#include "catalog/pg_class_d.h" +#include "catalog/pg_rewrite.h" +#include "parser/parsetree.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/rel.h" + #ifdef USE_ORCA extern void InitGPOPT(); #endif @@ -293,6 +304,30 @@ static split_rollup_data *make_new_rollups_for_hash_grouping_set(PlannerInfo *ro Path *path, grouping_sets_data *gd); + +typedef struct +{ + int varno; +} aqumv_adjust_varno_context; + +static bool aqumv_process_from_quals(Node *query_quals, Node *mv_quals, List** post_quals); +static void aqumv_adjust_varno(Query *parse, int delta); +static Node *aqumv_adjust_varno_mutator(Node *node, aqumv_adjust_varno_context *context); + +typedef struct +{ + List *mv_pure_vars; /* List of pure Vars expression. */ + List *mv_pure_vars_index; /* Index list of pure Vars. */ + List *mv_nonpure_vars_index; /* Index list of nonpure Vars. */ + List *mv_query_tlist; /* mv query target list. */ + List *mv_tlist; /* mv relation's target list. */ + bool has_unmatched; /* True if we fail to rewrite an expression. */ +} aqumv_equivalent_transformation_context; + +static bool aqumv_process_targetlist(aqumv_equivalent_transformation_context *context, List *query_tlist, List **mv_final_tlist); +static void aqumv_process_nonpure_vars_expr(aqumv_equivalent_transformation_context* context); +static Node *aqumv_adjust_sub_matched_expr_mutator(Node *node, aqumv_equivalent_transformation_context *context); + /***************************************************************************** * * Query optimizer entry point @@ -696,7 +731,13 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, Assert(glob->finalrtable == NIL); Assert(glob->finalrowmarks == NIL); Assert(glob->resultRelations == NIL); + + /* AQUMV_FIXME_MVP: We may rewrite the parse tree. */ + AssertImply(!answer_query_using_materialized_views, parse == root->parse); +#if 0 Assert(parse == root->parse); +#endif + parse = root->parse; if (Gp_role == GP_ROLE_DISPATCH) { @@ -1866,6 +1907,386 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ current_rel = query_planner(root, standard_qp_callback, &qp_extra); + /* + * Answer Query Using Materialized Views(AQUMV). + */ + RelOptInfo *mv_final_rel = NULL; + bool can_not_use_mv = (parse->commandType != CMD_SELECT) || + (parse->rowMarks != NIL) || + parse->hasAggs || + parse->hasWindowFuncs || + parse->hasDistinctOn || + parse->hasModifyingCTE || + parse->sortClause || + (parse->parentStmtType == PARENTSTMTTYPE_REFRESH_MATVIEW) || + (parse->parentStmtType == PARENTSTMTTYPE_CTAS) || + parse->hasSubLinks; + + if (Gp_role == GP_ROLE_DISPATCH && + answer_query_using_materialized_views && + !can_not_use_mv && + (list_length(parse->jointree->fromlist) == 1)) + { + Node *jtnode = (Node *) linitial(parse->jointree->fromlist); + + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + RangeTblEntry *rte = root->simple_rte_array[varno]; + + Assert(rte != NULL); + + /* + * AQUMV_FIXME_MVP: + * Single relation, excluding catalog. + */ + if (rte->rtekind == RTE_RELATION && + !IsSystemClassByRelid(rte->relid) && + !has_superclass(rte->relid) && + !has_subclass(rte->relid)) + { + + Relation ruleDesc; + Relation matviewRel; + SysScanDesc rcscan; + HeapTuple tup; + RewriteRule *rule; + Form_pg_rewrite rewrite_tup; + List *actions; + Query *mvQuery; + Query *mvRelQueryTree; /* Query tree of select from mv itself. */ + Node *mvjtnode; + bool need_close = false; + + ruleDesc = table_open(RewriteRelationId, AccessShareLock); + + rcscan = systable_beginscan(ruleDesc, InvalidOid, false, + NULL, 0, NULL); + + while (HeapTupleIsValid(tup = systable_getnext(rcscan))) + { + CHECK_FOR_INTERRUPTS(); + if (need_close) + table_close(matviewRel, AccessShareLock); + + rewrite_tup = (Form_pg_rewrite) GETSTRUCT(tup); + + matviewRel = table_open(rewrite_tup->ev_class, AccessShareLock); + need_close = true; + + /* + * AQUMV + * Currently the data of IVM is always up-to-date if there were. + * Take care of this when IVM defered-fefresh is supported. + */ + if (!(RelationIsIVM(matviewRel) && RelationIsPopulated(matviewRel))) + continue; + + if (matviewRel->rd_rel->relhasrules == false || + matviewRel->rd_rules->numLocks != 1) + continue; + + rule = matviewRel->rd_rules->rules[0]; + + /* Filter a SELECT action, and not instead. */ + if ((rule->event != CMD_SELECT) || !(rule->isInstead)) + continue; + + actions = rule->actions; + if (list_length(actions) != 1) + continue; + + /* + * AQUMV + * We will do some Equivalet Transformation on the mvQuery which + * represents the mv's corresponding query. + * + * AQUMV_FIXME_MVP: mvQuery is a simple query too, like the parse query. + * mvQuery->sortClause is ok here, though we can't use the Order by + * clause of mvQuery, and we have disabled the parse->sortClause. + * The reason is: the Order by clause of materialized view's query is + * typically pointless. We can't rely on the order even we wrote the + * ordered data into mv, ex: some other access methods except heap. + * The Seqscan on a heap-storaged mv seems ordered, but it's a free lunch. + * A Parallel Seqscan breaks that hypothesis. + */ + mvQuery = copyObject(linitial_node(Query, actions)); + Assert(IsA(mvQuery, Query)); + if(mvQuery->hasAggs || + mvQuery->hasWindowFuncs || + mvQuery->hasDistinctOn || + mvQuery->hasModifyingCTE || + mvQuery->hasSubLinks) + continue; + + if (list_length(mvQuery->jointree->fromlist) != 1) + continue; + + mvjtnode = (Node *) linitial(mvQuery->jointree->fromlist); + if (!IsA(mvjtnode, RangeTblRef)) + continue; + + /* + * AQUMV + * Require that the relation of mvQuery is a simple query too. + * We haven't do sth like: pull up sublinks or subqueries yet. + */ + int varno = ((RangeTblRef *) mvjtnode)->rtindex; + RangeTblEntry *mvrte = rt_fetch(varno, mvQuery->rtable); + Assert(mvrte != NULL); + + if (mvrte->rtekind != RTE_RELATION) + continue; + + /* + * AQUMV_FIXME_MVP + * Must be same relation, recursiviely embeded mv is not supported now. + */ + if (mvrte->relid != rte->relid) + continue; + + /* + * AQUMV_FIXME_MVP + * mv's query tree itself is needed to do the final replacement + * when we found corresponding column expression from mvQuery's + * TargetList by Query's. + * + * A plain SELECT on all columns seems the easiest way, though + * some columns may not be needed. + * And we get a mvRelQueryTree represents SELECT * FROM mv. + */ + char *mvname = quote_qualified_identifier(get_namespace_name(RelationGetNamespace(matviewRel)), + RelationGetRelationName(matviewRel)); + StringInfoData query_mv; + initStringInfo(&query_mv); + appendStringInfo(&query_mv, "SELECT * FROM %s", mvname); + List *raw_parsetree_list = pg_parse_query(query_mv.data); + + /* + * AQUMV_FIXME_MVP + * We should drop the mv if it has rules. + * Because mv's rules shouldn't apply to origin query. + */ + if (list_length(raw_parsetree_list) != 1) + continue; + + ParseState *mv_pstate = make_parsestate(NULL); + mv_pstate->p_sourcetext = query_mv.data; + mvRelQueryTree = transformTopLevelStmt(mv_pstate, linitial(raw_parsetree_list)); + free_parsestate(mv_pstate); + /* AQUMV_FIXME_MVP: free mvRelQueryTree? */ + + PlannerInfo *subroot; + subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); + memcpy(subroot, root, sizeof(PlannerInfo)); + subroot->parent_root = root; + /* + * AQUMV_FIXME_MVP: + * TODO: keep ECs and adjust varno? + */ + subroot->eq_classes = NIL; + /* Reset subplan-related stuff */ + subroot->plan_params = NIL; + subroot->outer_params = NULL; + subroot->init_plans = NIL; + subroot->agginfos = NIL; + subroot->aggtransinfos = NIL; + subroot->parse = mvQuery; + + /* + * AQUMV + * We have to rewrite now before we do the real Equivalent + * Transformation 'rewrite'. + * Because actions sotored in rule is not a normal query tree, + * it can't be used directly, ex: new/old realtions used to + * refresh mv. + * Earse unused relatoins, keep the right one. + */ + foreach(lc, mvQuery->rtable) + { + RangeTblEntry* rtetmp = lfirst(lc); + if ((rtetmp->relkind == RELKIND_MATVIEW) && + (rtetmp->alias != NULL) && + (strcmp(rtetmp->alias->aliasname, "new") == 0 || + strcmp(rtetmp->alias->aliasname,"old") == 0)) + { + foreach_delete_current(mvQuery->rtable, lc); + } + } + + /* + * Now we have the right relation, adjust + * varnos in its query tree. + * AQUMV_FIXME_MVP: Only one single relation + * is supported now, we could assign varno + * to 1 opportunistically. + */ + aqumv_adjust_varno(mvQuery, 1); + + /* + * AQUMV_FIXME_MVP + * Are stable functions OK? + * A STABLE function cannot modify the database and is guaranteed to + * return the same results given the same arguments for all rows + * within a single statement. + * But AQUMV rewrites the query to a new SQL actually, though the same + * results is guaranteed. + * Its's unclear whether STABLE is OK, let's be conservative for now. + */ + if(contain_mutable_functions((Node *)mvQuery)) + continue; + + /* + * AQUMV + * Since tlist and quals rewrite are both based on mv query's tlist, + * put all stuff into a context. + */ + aqumv_equivalent_transformation_context *context = palloc0(sizeof(aqumv_equivalent_transformation_context)); + List *mv_pure_vars = NIL; /* TargetEntry[Var] in mv query. */ + List *mv_pure_vars_index = NIL; /* Index of TargetEntry[Var] in mv query. */ + List *mv_nonpure_vars_index = NIL; /* Index of nonpure[Var] expression in mv query. */ + ListCell *lc; + Expr *expr; + + /* + * AQUMV + * Process mv_query's tlist to pure-Var and no pure-Var expressions. + * See details in README.cbdb.aqumv + */ + int i = 0; + foreach(lc, mvQuery->targetList) + { + i++; + TargetEntry* tle = lfirst_node(TargetEntry, lc); + expr = tle->expr; + if(IsA(expr, Var)) + { + mv_pure_vars = lappend(mv_pure_vars, expr); + mv_pure_vars_index = lappend_int(mv_pure_vars_index, i); + } + else + mv_nonpure_vars_index = lappend_int(mv_nonpure_vars_index, i); + } + + context->mv_pure_vars = mv_pure_vars; + context->mv_pure_vars_index = mv_pure_vars_index; + context->mv_nonpure_vars_index = mv_nonpure_vars_index; + context->mv_tlist = mvRelQueryTree->targetList; + context->mv_query_tlist = mvQuery->targetList; + context->has_unmatched = false; + + /* Sort nonpure vars expression, prepare for Greedy Algorithm. */ + aqumv_process_nonpure_vars_expr(context); + + List *mv_final_tlist = NIL; /* Final target list we want to rewrite to. */ + + /* + * Process and rewrite target list, return false if failed. + */ + if(!aqumv_process_targetlist(context, parse->targetList, &mv_final_tlist)) + continue; + + /* + * AQUMV + * Process all quals to conjunctive normal form. + * + * We assume that the selection predicates of view and query expressions + * have been converted into conjunctive normal form(CNF) before we process + * them. + * AQUMV_MVP: no having quals now. + */ + preprocess_qual_conditions(subroot, (Node *) mvQuery->jointree); + + List *post_quals = NIL; + /* + * Process quals, return false if failed. + * Else, post_quals are filled if there were. + * Like process target list, post_quals is used later to see if we could + * rewrite and apply it to mv relation. + */ + if(!aqumv_process_from_quals(parse->jointree->quals, mvQuery->jointree->quals, &post_quals)) + continue; + + /* Rewrite post_quals, return false if failed. */ + post_quals = (List *)aqumv_adjust_sub_matched_expr_mutator((Node *)post_quals, context); + if (context->has_unmatched) + continue; + + /* + * Here! We succeed to rewrite a new SQL. + * Begin to replace all guts. + */ + mvQuery->targetList = mv_final_tlist; + + /* + * AQUMV + * NB: Update processed_tlist again in case that tlist has been changed. + */ + preprocess_targetlist(subroot); + + /* + * AQUMV + * NB: Correct the final_locus as we select from another realtion now. + */ + PathTarget *newtarget = make_pathtarget_from_tlist(subroot->processed_tlist); + subroot->final_locus = cdbllize_get_final_locus(subroot, newtarget); + + /* Rewrite with mv's query tree*/ + mvrte->relkind = RELKIND_MATVIEW; + mvrte->relid = matviewRel->rd_rel->oid; + /* + * AQUMV_FIXME_MVP + * Not sure where it's true from actions even it's not inherit tables. + */ + mvrte->inh = false; + mvQuery->rtable = list_make1(mvrte); /* rewrite to SELECT FROM mv itself. */ + mvQuery->jointree->quals = (Node *)post_quals; /* Could be NULL, but doesn'y matter for now. */ + + /* + * AQUMV + * Build a plan of new SQL. + * AQUMV is cost-based, let planner decide which is better. + * AQUMV_FIXME_MVP: + * no qp_callback function now. + * replcace one-by-one? + */ + mv_final_rel = query_planner(subroot, NULL, NULL); + + /* AQUMV_FIXME_MVP + * We don't use STD_FUZZ_FACTOR for cost comparisons like compare_path_costs_fuzzily here. + * The STD_FUZZ_FACTOR is used to reduce paths of a rel, and keep the significantly ones. + * But in AQUMV, we always have only one best path of rel at the last to compare. + * TODO: limit clause and startup_cost. + */ + if (mv_final_rel->cheapest_total_path->total_cost < current_rel->cheapest_total_path->total_cost) + { + root->parse = mvQuery; + root->processed_tlist = subroot->processed_tlist; + /* + * AQUMV_FIXME_MVP + * Use new query's ecs. + * Equivalence Class is not supported now, we may lost some ECs if the mv_query has + * equal quals or implicit ones. + * But keeping them also introduces more complex as we should process them like target list. + * Another flaw: the generated Filter expressions by keeping them are pointless as all + * rows of mv have matched the filter expressions. + * See more in README.cbdb.aqumv + */ + root->eq_classes = subroot->eq_classes; + /* Replace relation, don't close the right one. */ + current_rel = mv_final_rel; + table_close(matviewRel, AccessShareLock); + need_close = false; + } + } + if (need_close) + table_close(matviewRel, AccessShareLock); + systable_endscan(rcscan); + table_close(ruleDesc, AccessShareLock); + } + } + } + /* * Convert the query's result tlist into PathTarget format. * @@ -8949,3 +9370,331 @@ make_new_rollups_for_hash_grouping_set(PlannerInfo *root, return srd; } + + +/* + * Process varno after we eliminate mv's actions("old" and "new" relation) + * Correct rindex and all varnos with a delta. + * + * MV's actions query tree: + * [rtable] + * RangeTblEntry [rtekind=RTE_RELATION] + * [alias] Alias [aliasname="old"] + * RangeTblEntry [rtekind=RTE_RELATION] + * [alias] Alias [aliasname="new"] + * RangeTblEntry [rtekind=RTE_RELATION] + * [jointree] + * FromExpr [] + * [fromlist] + * RangeTblRef [rtindex=3] + * [targetList] + * TargetEntry [resno=1 resname="c1"] + * Var [varno=3 varattno=1] + * TargetEntry [resno=2 resname="c2"] + * Var [varno=3 varattno=2] + *------------------------------------------------------------------------------------------ + * MV's query tree after rewrite: + * [rtable] + * RangeTblEntry [rtekind=RTE_RELATION] + * [jointree] + * FromExpr [] + * [fromlist] + * RangeTblRef [rtindex=3] + * [targetList] + * TargetEntry [resno=1 resname="c1"] + * Var [varno=3 varattno=1] + * TargetEntry [resno=2 resname="c2"] + * Var [varno=3 varattno=2] + *------------------------------------------------------------------------------------------ + * MV's query tree after varno adjust: + * [rtable] + * RangeTblEntry [rtekind=RTE_RELATION] + * [jointree] + * FromExpr [] + * [fromlist] + * RangeTblRef [rtindex=1] + * [targetList] + * TargetEntry [resno=1 resname="c1"] + * Var [varno=1 varattno=1] + * TargetEntry [resno=2 resname="c2"] + * Var [varno=1 varattno=2] + * + */ +static void +aqumv_adjust_varno(Query* parse, int varno) +{ + aqumv_adjust_varno_context context; + context.varno = varno; + parse = query_tree_mutator(parse, aqumv_adjust_varno_mutator, &context, QTW_DONT_COPY_QUERY); +} + +/* + * Only for plain select * from mv; + * All TargetEntrys are pure Var. + * var_index start from 1 + */ +static inline Var * +copyVarFromTatgetList(List* tlist, int var_index) +{ + TargetEntry * tle = (TargetEntry *) list_nth(tlist, var_index - 1); + Assert(IsA(tle->expr,Var)); + Var *var = copyObject((Var *)tle->expr); + return var; +} + +/* + * Adjust varno and rindex with delta. + */ +static Node *aqumv_adjust_varno_mutator(Node *node, aqumv_adjust_varno_context *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Var)) + ((Var *)node)->varno = context->varno; + else if (IsA(node, RangeTblRef)) + /* AQUMV_FIXME_MVP: currently we have only one relation */ + ((RangeTblRef*) node)->rtindex = context->varno; + return expression_tree_mutator(node, aqumv_adjust_varno_mutator, context); +} + +typedef struct +{ + int complexity; +} node_complexity_context; + +typedef struct +{ + int tlist_index; /* Index of tlist, begin from 1 */ + int count; /* Count of subnodes in this expression */ +} expr_to_sort; + +/* + * Compute a node complexity recursively. + * Complexity of a node is the total times we enter walker function after all + * subnodes are walked recursively. + * It's used to sort the expression in mv's tlist. + */ +static bool +compute_node_complexity_walker(Node *node, node_complexity_context *context) +{ + if (node == NULL) + return false; + context->complexity++; + return expression_tree_walker(node, compute_node_complexity_walker, (void *) context); +} + +static int +nonpure_vars_expr_compare(const ListCell *a, const ListCell *b) +{ + expr_to_sort *ets1 = (expr_to_sort *) lfirst(a); + expr_to_sort *ets2 = (expr_to_sort *) lfirst(b); + return (ets1->count < ets2->count) ? 1 : (ets1->count == ets2->count) ? 0 : -1; +} + +/* + * In-place update order of mv_nonpure_vars_index List + */ +static void +aqumv_process_nonpure_vars_expr(aqumv_equivalent_transformation_context* context) +{ + ListCell* lc; + List *expr_to_sort_list = NIL; + foreach(lc, context->mv_nonpure_vars_index) + { + int index = lfirst_int(lc); + Node *expr = lfirst(list_nth_cell(context->mv_query_tlist, index -1)); + node_complexity_context *subnode_context = palloc0(sizeof(node_complexity_context)); + (void) compute_node_complexity_walker(expr, subnode_context); + expr_to_sort *ets = palloc0(sizeof(expr_to_sort)); + ets->tlist_index = index; + ets->count = subnode_context->complexity; + expr_to_sort_list = lappend(expr_to_sort_list, ets); + } + + /* Sort the expr list */ + list_sort(expr_to_sort_list, nonpure_vars_expr_compare); + /* Reorder mv_nonpure_vars_index */ + context->mv_nonpure_vars_index = NIL; + foreach(lc, expr_to_sort_list) + { + expr_to_sort *ets = (expr_to_sort *) lfirst(lc); + context->mv_nonpure_vars_index = lappend_int(context->mv_nonpure_vars_index, ets->tlist_index); + } + return; +} + + +/* + * Process query and materialized views' quals. + * Return true if all mv_quals are in query_quals, + * else return false. + * + * If return true, put quals in query_quals but not in mv_quals + * into post_quals. + * + * Ex: create materialized view mv0 as select * from t1 where c1 = 1; + * query: select * from t1 where c1 = 1 and c2 = 2; + * post_quals = {c2 = 2}. + * + * AQUMV_FIXME_MVP: only support one relation now, so we don't need to + * compare varno(both are 1 after aqumv_adjust_varno), + * mv's query tree has been processed into one relation too. + * + * Will return false if varattno in mv->query has different order with query's. + * Ex: create materialized view mv0 as select c2, c1 from t1 where c1 = 1; + * query: select c1, c2 from t1 where c1 = 1 and c2 = 2; + * + * The returned post_quals may be or may not be used later, it's up to mv's targetList. + * + */ +static bool +aqumv_process_from_quals(Node *query_quals, Node *mv_quals, List **post_quals) +{ + List *qlist = NIL; + List *mlist = NIL; + + if (query_quals == NULL) + return mv_quals == NULL; + + if(!IsA(query_quals, List)) + qlist = list_make1(query_quals); + else + qlist = (List *)query_quals; + + if (mv_quals == NULL) + { + *post_quals = qlist; + return true; + } + + if(!IsA(mv_quals, List)) + mlist = list_make1(mv_quals); + else + mlist = (List *)mv_quals; + + if (list_difference(mlist, qlist) != NIL) + return false; + *post_quals = list_difference(qlist, mlist); + return true; +} + +/* + * Adjust query expr's Vars + * Replace Vars with corresponding attribute in mv relation. + * Return a new expr after rewrite. + */ +static Node *aqumv_adjust_sub_matched_expr_mutator(Node *node, aqumv_equivalent_transformation_context *context) +{ + if (!node || context->has_unmatched) + return node; + + bool is_targetEntry = IsA(node, TargetEntry); + Expr *node_expr = is_targetEntry ? ((TargetEntry *)node)->expr : (Expr *)node; + + /* Don't select Const results form mv, bypass it to upper when projection. */ + if (IsA(node_expr, Const)) + return is_targetEntry ? node : (Node *)node_expr; + + ListCell *lc = NULL; + foreach(lc, context->mv_nonpure_vars_index) + { + int index = lfirst_int(lc); + TargetEntry *tle = list_nth_node(TargetEntry, context->mv_query_tlist, index - 1); + if(equal(node_expr, tle->expr)) + { + Var *newVar = copyVarFromTatgetList(context->mv_tlist, index); + newVar->location = -2; /* hack here, use -2 to indicate already rewrited by mv rel Vars. */ + if (is_targetEntry) + { + TargetEntry *qtle = (TargetEntry *) node; + /* + * AQUMV_FIXME_MVP: + * resorigtbl, resorigcol, resjunck in mv_query is also rejunck in mv table itself ? + */ + TargetEntry *mvtle = makeTargetEntry((Expr *)newVar, qtle->resno, qtle->resname, qtle->resjunk); + return (Node *) mvtle; + } + else + return (Node *) newVar; + } + } + + /* + * We didn't find matched nonpure-Var expr. + * And if expr doesn't have Vars, return it to upper. + */ + List *expr_vars = pull_var_clause((Node *)node_expr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + + if (expr_vars == NIL) + return (Node *)node_expr; + list_free(expr_vars); + + /* Try match with mv_pure_vars_index, but do not disturb already rewrited exprs(Var->location = -2) */ + if (IsA(node_expr, Var)) + { + Var *var = (Var *)node_expr; + if (var->location == -2) + return node; + lc = NULL; + int i = 0; + foreach(lc, context->mv_pure_vars) + { + Var *pure_var = lfirst_node(Var,lc); + if (equal(node_expr, pure_var)) + { + int j = list_nth_int(context->mv_pure_vars_index, i); + Var *newvar = copyVarFromTatgetList(context->mv_tlist, j); + if (is_targetEntry) + { + ((TargetEntry *)node)->expr = (Expr *)newvar; + return node; + } + else + return (Node *)newvar; + } + i++; + } + context->has_unmatched = true; + } + + return expression_tree_mutator(node, aqumv_adjust_sub_matched_expr_mutator, context); +} + +/* + * Process query and materialized views' target list. + * Return true if all query_tlist are in mv_tlist. + * else return false. + * + * If return true, put tlist in mv_quals but not in query_tlist + * into post_tlist. + * + * Ex: create materialized view mv0 as select c1, c2 from t1 where c1 = 1; + * query: select c2 from t1 where c1 = 1; + * post_tlist= {1}. + * + * AQUMV_FIXME_MVP: strict match with same resno? + * MVP0: expression replace + * mv: select c1, c2 from t1 where c1 = 50; + * select c1 from t1 where c1 = 50 and abs(t1.c2) = 51; + * rewrite: select c1 from mv where abs(mv.c2) = 51; + * + * MVP1: expression eliminate + * mv: select c1, abs(c2) as c2 from t1 where c1 = 50; + * select c1 from t1 where c1 = 50 and abs(c2) = 51; + * rewrite: select c1 from mv where c2 = 51; + * + * mv_final_tlist is the final targetList of mvQuery. + * + */ +static bool +aqumv_process_targetlist(aqumv_equivalent_transformation_context *context, List *query_tlist, List **mv_final_tlist) +{ + *mv_final_tlist = (List *)aqumv_adjust_sub_matched_expr_mutator((Node *)(copyObject(query_tlist)), context); + if (context->has_unmatched) + pfree(*mv_final_tlist); + + return !context->has_unmatched; +} diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index a25db7f796a..3ccd6ede60b 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -430,6 +430,8 @@ bool gp_enable_global_deadlock_detector = false; bool gp_enable_predicate_pushdown; int gp_predicate_pushdown_sample_rows; +bool answer_query_using_materialized_views = false; + static const struct config_enum_entry gp_log_format_options[] = { {"text", 0}, {"csv", 1}, @@ -2972,7 +2974,6 @@ struct config_bool ConfigureNamesBool_gp[] = false, NULL, NULL, NULL }, - { {"gp_internal_is_singlenode", PGC_POSTMASTER, UNGROUPED, gettext_noop("Is in SingleNode mode (no segments). WARNING: user SHOULD NOT set this by any means."), @@ -2983,6 +2984,16 @@ struct config_bool ConfigureNamesBool_gp[] = false, NULL, NULL, NULL }, + { + {"answer_query_using_materialized_views", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("allow to answer query results using materialized views."), + NULL, + GUC_EXPLAIN + }, + &answer_query_using_materialized_views, + false, + NULL, NULL, NULL + }, /* End-of-list marker */ { diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 35a2154ce07..ee60d339ff8 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -300,6 +300,7 @@ extern bool gp_appendonly_compaction; extern bool enable_parallel; extern int gp_appendonly_insert_files; extern int gp_appendonly_insert_files_tuples_range; +extern bool answer_query_using_materialized_views; /* * gp_enable_multiphase_limit is not cost based. * When set to false, the planner will not use multi-phase limit. diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index cd090e68c39..9d1741139a8 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -10,6 +10,7 @@ */ "allow_segment_DML", "allow_system_table_mods", + "answer_query_using_materialized_views", "application_name", "archive_cleanup_command", "archive_command", diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out new file mode 100644 index 00000000000..fdfe8286d56 --- /dev/null +++ b/src/test/regress/expected/aqumv.out @@ -0,0 +1,737 @@ +-- +-- Tests of Answer Query Using Materialized Views. +-- +set optimizer = off; +create table aqumv_t1(c1 int, c2 int, c3 int) distributed by (c1); +insert into aqumv_t1 select i, i+1, i+2 from generate_series(1, 1000) i; +insert into aqumv_t1 select * from aqumv_t1; +analyze aqumv_t1; +set answer_query_using_materialized_views = on; +-- drop views if there is no data populated +begin; +create incremental materialized view aqumv_mvt1_need_refresh as select * from aqumv_t1 where c1 = 2 with no data; +set local answer_query_using_materialized_views = on; +explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + Output: c1, c2, c3 + -> Seq Scan on public.aqumv_t1 + Output: c1, c2, c3 + Filter: (aqumv_t1.c1 = 2) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +refresh materialized view aqumv_mvt1_need_refresh; +analyze aqumv_mvt1_need_refresh; +explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c2, c3 + -> Seq Scan on public.aqumv_mvt1_need_refresh + Output: c1, c2, c3 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +abort; +begin; +create incremental materialized view aqumv_mvt1_0 as select * from aqumv_t1 where c1 = 2; +analyze aqumv_mvt1_0; +set local answer_query_using_materialized_views = off; +select * from aqumv_t1 where c1 = 2; + c1 | c2 | c3 +----+----+---- + 2 | 3 | 4 + 2 | 3 | 4 +(2 rows) + +select c1, c2, c3 from aqumv_t1 where c1 = 2; + c1 | c2 | c3 +----+----+---- + 2 | 3 | 4 + 2 | 3 | 4 +(2 rows) + +select c2 from aqumv_t1 where c1 = 2; + c2 +---- + 3 + 3 +(2 rows) + +select c3, c2 from aqumv_t1 where c1 = 2; + c3 | c2 +----+---- + 4 | 3 + 4 | 3 +(2 rows) + +set local answer_query_using_materialized_views = on; +select * from aqumv_t1 where c1 = 2; + c1 | c2 | c3 +----+----+---- + 2 | 3 | 4 + 2 | 3 | 4 +(2 rows) + +select c1, c2, c3 from aqumv_t1 where c1 = 2; + c1 | c2 | c3 +----+----+---- + 2 | 3 | 4 + 2 | 3 | 4 +(2 rows) + +select c2 from aqumv_t1 where c1 = 2; + c2 +---- + 3 + 3 +(2 rows) + +select c3, c2 from aqumv_t1 where c1 = 2; + c3 | c2 +----+---- + 4 | 3 + 4 | 3 +(2 rows) + +-- tlist matched. +explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c2, c3 + -> Seq Scan on public.aqumv_mvt1_0 + Output: c1, c2, c3 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +explain(verbose, costs off) select c1, c2, c3 from aqumv_t1 where c1 = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c2, c3 + -> Seq Scan on public.aqumv_mvt1_0 + Output: c1, c2, c3 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +-- tlist partially matched. +explain(verbose, costs off) select c2 from aqumv_t1 where c1 = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c2 + -> Seq Scan on public.aqumv_mvt1_0 + Output: c2 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +-- tlist disorder. +explain(verbose, costs off) select c3, c2 from aqumv_t1 where c1 = 2; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c3, c2 + -> Seq Scan on public.aqumv_mvt1_0 + Output: c3, c2 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +abort; +begin; +create incremental materialized view aqumv_mvt1_1 as select c2 as mc2, c3 as mc3, c1 as mc1, c2 as mc2_1 from aqumv_t1 where c1 = 3; +analyze aqumv_mvt1_1; +set local answer_query_using_materialized_views = off; +select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; + col1 | col2 +------+------ + 3 | 4 + 3 | 4 +(2 rows) + +select c1, c1 from aqumv_t1 where c1 = 3; + c1 | c1 +----+---- + 3 | 3 + 3 | 3 +(2 rows) + +set local answer_query_using_materialized_views = on; +select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; + col1 | col2 +------+------ + 3 | 4 + 3 | 4 +(2 rows) + +select c1, c1 from aqumv_t1 where c1 = 3; + c1 | c1 +----+---- + 3 | 3 + 3 | 3 +(2 rows) + +-- tlist alias. +explain(verbose, costs off) select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc2 + -> Seq Scan on public.aqumv_mvt1_1 + Output: mc1, mc2 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +-- duplicated projection. +explain(verbose, costs off) select c1, c1 from aqumv_t1 where c1 = 3; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc1 + -> Seq Scan on public.aqumv_mvt1_1 + Output: mc1, mc1 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +abort; +begin; +create incremental materialized view aqumv_mvt1_nonvar_expr as + select c2, 1 as mc_const_1, sqrt(100) as mc_sqrt_100 + from aqumv_t1 where c1 = 4; +analyze aqumv_mvt1_nonvar_expr; +set local answer_query_using_materialized_views = off; +select c2, 200 from aqumv_t1 where c1 = 4; + c2 | ?column? +----+---------- + 5 | 200 + 5 | 200 +(2 rows) + +select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; + c2 | ?column? | sqrt +----+----------+------ + 5 | 1 | 10 + 5 | 1 | 10 +(2 rows) + +set local answer_query_using_materialized_views = on; +select c2, 200 from aqumv_t1 where c1 = 4; + c2 | ?column? +----+---------- + 5 | 200 + 5 | 200 +(2 rows) + +select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; + c2 | ?column? | sqrt +----+----------+------ + 5 | 1 | 10 + 5 | 1 | 10 +(2 rows) + +-- Const are copied to output. +explain(verbose, costs off) select c2, 200 from aqumv_t1 where c1 = 4; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c2, 200 + -> Seq Scan on public.aqumv_mvt1_nonvar_expr + Output: c2, 200 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +explain(verbose, costs off) select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c2, 1, '10'::double precision + -> Seq Scan on public.aqumv_mvt1_nonvar_expr + Output: c2, 1, '10'::double precision + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +abort; +begin; +CREATE OR REPLACE FUNCTION aqumv_func(i integer, j integer) RETURNS integer AS $$ + BEGIN + RETURN i + j; + END; +$$ LANGUAGE plpgsql IMMUTABLE; +create incremental materialized view aqumv_mvt1_func_has_var as + select c2, aqumv_func(c1, c3) as mc_func_res + from aqumv_t1 where c1 = 5; +analyze aqumv_mvt1_func_has_var; +set local answer_query_using_materialized_views = off; +select c2, aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; + c2 | aqumv_func +----+------------ + 6 | 12 + 6 | 12 +(2 rows) + +set local answer_query_using_materialized_views = on; +select c2, aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; + c2 | aqumv_func +----+------------ + 6 | 12 + 6 | 12 +(2 rows) + +-- Functions has Vars are replaced. +explain(verbose, costs off) select c2, aqumv_func(c1, c3), aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c2, mc_func_res, mc_func_res + -> Seq Scan on public.aqumv_mvt1_func_has_var + Output: c2, mc_func_res, mc_func_res + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +abort; +begin; +create incremental materialized view aqumv_mvt1_2 as select c2 as mc2, c1 as mc1 from aqumv_t1 where c1 > 1 and c1 < 5; +analyze aqumv_mvt1_2; +set local answer_query_using_materialized_views = on; +-- shoud be unable to use mv, projection doesn't exit in mv's tlist +explain(verbose, costs off) select c3 from aqumv_t1 where c1 < 5 and c1 > 1; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c3 + -> Seq Scan on public.aqumv_t1 + Output: c3 + Filter: ((aqumv_t1.c1 < 5) AND (aqumv_t1.c1 > 1)) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +-- no post quals. +explain(verbose, costs off) select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc2 + -> Seq Scan on public.aqumv_mvt1_2 + Output: mc1, mc2 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +-- post quals added to mv. +set local answer_query_using_materialized_views = off; +select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; + c1 | c2 +----+---- + 3 | 4 + 3 | 4 +(2 rows) + +set local answer_query_using_materialized_views = on; +select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; + c1 | c2 +----+---- + 3 | 4 + 3 | 4 +(2 rows) + +explain(verbose, costs off) select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + Output: mc1, mc2 + -> Seq Scan on public.aqumv_mvt1_2 + Output: mc1, mc2 + Filter: (aqumv_mvt1_2.mc2 = 4) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +-- should be unable to use mv, post quals has column doesn't exit in mv's tlist. +explain(verbose, costs off) select * from aqumv_t1 where c1 < 5 and c1 > 1 and c3 > 1; + QUERY PLAN +--------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c2, c3 + -> Seq Scan on public.aqumv_t1 + Output: c1, c2, c3 + Filter: ((aqumv_t1.c1 < 5) AND (aqumv_t1.c1 > 1) AND (aqumv_t1.c3 > 1)) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +abort; +begin; +create incremental materialized view aqumv_mvt1_3 as select c2 as mc2, c1 as mc1, c3+1 as mc3 from aqumv_t1 where c1 > 5 and c1 < 10; +analyze aqumv_mvt1_3; +set local answer_query_using_materialized_views = on; +-- should be unable to use mv, column c3 doesn't exist in mv's tlist. +explain(verbose, costs off) select * from aqumv_t1 where c1 > 5 and c1 < 10; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: c1, c2, c3 + -> Seq Scan on public.aqumv_t1 + Output: c1, c2, c3 + Filter: ((aqumv_t1.c1 > 5) AND (aqumv_t1.c1 < 10)) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +-- expr c3+1 is in mv's tlist +explain(verbose, costs off) select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, mc3 + -> Seq Scan on public.aqumv_mvt1_3 + Output: mc1, mc3 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +-- expr c1+1 could be derived from mv's tlist +explain(verbose, costs off) select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; + QUERY PLAN +--------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: ((mc1 + 1)), mc2, mc3 + -> Seq Scan on public.aqumv_mvt1_3 + Output: (mc1 + 1), mc2, mc3 + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; + col1 | col2 +------+------ + 6 | 9 + 9 | 12 + 6 | 9 + 9 | 12 + 7 | 10 + 8 | 11 + 7 | 10 + 8 | 11 +(8 rows) + +select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; + col1 | c2 | col2 +------+----+------ + 7 | 7 | 9 + 10 | 10 | 12 + 7 | 7 | 9 + 10 | 10 | 12 + 8 | 8 | 10 + 9 | 9 | 11 + 8 | 8 | 10 + 9 | 9 | 11 +(8 rows) + +set local answer_query_using_materialized_views = off; +select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; + col1 | col2 +------+------ + 7 | 10 + 8 | 11 + 7 | 10 + 8 | 11 + 6 | 9 + 9 | 12 + 6 | 9 + 9 | 12 +(8 rows) + +select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; + col1 | c2 | col2 +------+----+------ + 8 | 8 | 10 + 9 | 9 | 11 + 8 | 8 | 10 + 9 | 9 | 11 + 7 | 7 | 9 + 10 | 10 | 12 + 7 | 7 | 9 + 10 | 10 | 12 +(8 rows) + +abort; +begin; +create incremental materialized view aqumv_mvt1_4 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 10 and c1 < 15; +analyze aqumv_mvt1_4; +set local answer_query_using_materialized_views = on; +-- complex exprs +explain(verbose, costs off) select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 10 and c1 < 15; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, ((sqrt(((mc4 + mc3))::double precision) + '1'::double precision)), ((mc3 + 1)) + -> Seq Scan on public.aqumv_mvt1_4 + Output: mc1, (sqrt(((mc4 + mc3))::double precision) + '1'::double precision), (mc3 + 1) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 10 and c1 < 15; + c1 | ?column? | ?column? +----+-------------------+---------- + 12 | 4.60555127546399 | 14 + 12 | 4.60555127546399 | 14 + 11 | 4.464101615137754 | 13 + 13 | 4.741657386773941 | 15 + 14 | 4.872983346207417 | 16 + 11 | 4.464101615137754 | 13 + 13 | 4.741657386773941 | 15 + 14 | 4.872983346207417 | 16 +(8 rows) + +set local answer_query_using_materialized_views = off; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 10 and c1 < 15; + c1 | ?column? | ?column? +----+-------------------+---------- + 11 | 4.464101615137754 | 13 + 13 | 4.741657386773941 | 15 + 14 | 4.872983346207417 | 16 + 11 | 4.464101615137754 | 13 + 13 | 4.741657386773941 | 15 + 14 | 4.872983346207417 | 16 + 12 | 4.60555127546399 | 14 + 12 | 4.60555127546399 | 14 +(8 rows) + +abort; +-- post quals +begin; +create incremental materialized view aqumv_mvt1_post_quals as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 20 and c1 < 30; +analyze aqumv_mvt1_post_quals; +set local answer_query_using_materialized_views = on; +explain(verbose, costs off) + select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > 1; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, ((sqrt(((mc4 + mc3))::double precision) + '1'::double precision)), ((mc3 + 1)) + -> Seq Scan on public.aqumv_mvt1_post_quals + Output: mc1, (sqrt(((mc4 + mc3))::double precision) + '1'::double precision), (mc3 + 1) + Filter: (sqrt(((aqumv_mvt1_post_quals.mc3 + 1))::double precision) > '1'::double precision) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +explain(verbose, costs off) + select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > abs(c1) + 2; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, ((sqrt(((mc4 + mc3))::double precision) + '1'::double precision)), ((mc3 + 1)) + -> Seq Scan on public.aqumv_mvt1_post_quals + Output: mc1, (sqrt(((mc4 + mc3))::double precision) + '1'::double precision), (mc3 + 1) + Filter: (sqrt(((aqumv_mvt1_post_quals.mc3 + 1))::double precision) > ((abs(aqumv_mvt1_post_quals.mc1) + 2))::double precision) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +explain(verbose, costs off) +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(abs(c2) - c1 - 1) + 10) > 2; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: mc1, ((sqrt(((mc4 + mc3))::double precision) + '1'::double precision)), ((mc3 + 1)) + -> Seq Scan on public.aqumv_mvt1_post_quals + Output: mc1, (sqrt(((mc4 + mc3))::double precision) + '1'::double precision), (mc3 + 1) + Filter: (sqrt(((aqumv_mvt1_post_quals.mc4 + 10))::double precision) > '2'::double precision) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > 1; + c1 | ?column? | ?column? +----+--------------------+---------- + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 +(18 rows) + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > abs(c1) + 2; + c1 | ?column? | ?column? +----+----------+---------- +(0 rows) + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(abs(c2) - c1 - 1) + 10) > 2; + c1 | ?column? | ?column? +----+--------------------+---------- + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 +(18 rows) + +set local answer_query_using_materialized_views = off; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > 1; + c1 | ?column? | ?column? +----+--------------------+---------- + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 +(18 rows) + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > abs(c1) + 2; + c1 | ?column? | ?column? +----+----------+---------- +(0 rows) + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(abs(c2) - c1 - 1) + 10) > 2; + c1 | ?column? | ?column? +----+--------------------+---------- + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 + 21 | 5.69041575982343 | 23 + 25 | 6.0990195135927845 | 27 + 28 | 6.385164807134504 | 30 + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 22 | 5.795831523312719 | 24 + 24 | 6 | 26 + 27 | 6.291502622129181 | 29 + 29 | 6.477225575051661 | 31 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 + 23 | 5.898979485566356 | 25 + 26 | 6.196152422706632 | 28 +(18 rows) + +abort; +-- choose the best one if there are multiple chooses based on cost. +begin; +set local answer_query_using_materialized_views = on; +create incremental materialized view aqumv_mvt1_candidate_0 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3 + from aqumv_t1 where c1 > 30; +analyze aqumv_mvt1_candidate_0; +-- choose aqumv_mvt1_candidate_0 +explain(verbose, costs off) +select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 < 40 and sqrt(abs(c2)) > 5.8; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: (sqrt(((abs(((mc3 - mc1) - 1)) + mc3))::double precision)) + -> Seq Scan on public.aqumv_mvt1_candidate_0 + Output: sqrt(((abs(((mc3 - mc1) - 1)) + mc3))::double precision) + Filter: ((aqumv_mvt1_candidate_0.mc1 < 40) AND (sqrt((aqumv_mvt1_candidate_0.mc3)::double precision) > '5.8'::double precision)) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +create incremental materialized view aqumv_mvt1_candidate_1 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 30 and c1 < 40; +analyze aqumv_mvt1_candidate_1; +-- choose aqumv_mvt1_candidate_1 as it has lower cost(less rows). +explain(verbose, costs off) +select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 < 40 and sqrt(abs(c2)) > 5.8; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (sqrt(((mc4 + mc3))::double precision)) + -> Seq Scan on public.aqumv_mvt1_candidate_1 + Output: sqrt(((mc4 + mc3))::double precision) + Filter: (sqrt((aqumv_mvt1_candidate_1.mc3)::double precision) > '5.8'::double precision) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +create incremental materialized view aqumv_mvt1_candidate_2 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 30 and c1 < 500; +analyze aqumv_mvt1_candidate_2; +-- still choose aqumv_mvt1_candidate_1 as it has lowest cost(less rows). +explain(verbose, costs off) +select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 < 40 and sqrt(abs(c2)) > 5.8; + QUERY PLAN +-------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: (sqrt(((mc4 + mc3))::double precision)) + -> Seq Scan on public.aqumv_mvt1_candidate_1 + Output: sqrt(((mc4 + mc3))::double precision) + Filter: (sqrt((aqumv_mvt1_candidate_1.mc3)::double precision) > '5.8'::double precision) + Settings: answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +abort; +reset optimizer; +reset answer_query_using_materialized_views; +drop table aqumv_t1 cascade; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 9f5d4d4bf71..a7cd3e9bf58 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -301,4 +301,7 @@ test: create_extension_fail # check profile feature test: profile +# Tests of Answer Query Using Materialized Views. +test: aqumv + # end of tests diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql new file mode 100644 index 00000000000..962fbaf5aec --- /dev/null +++ b/src/test/regress/sql/aqumv.sql @@ -0,0 +1,227 @@ +-- +-- Tests of Answer Query Using Materialized Views. +-- +set optimizer = off; +create table aqumv_t1(c1 int, c2 int, c3 int) distributed by (c1); +insert into aqumv_t1 select i, i+1, i+2 from generate_series(1, 1000) i; +insert into aqumv_t1 select * from aqumv_t1; +analyze aqumv_t1; +set answer_query_using_materialized_views = on; + +-- drop views if there is no data populated +begin; +create incremental materialized view aqumv_mvt1_need_refresh as select * from aqumv_t1 where c1 = 2 with no data; +set local answer_query_using_materialized_views = on; +explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; +refresh materialized view aqumv_mvt1_need_refresh; +analyze aqumv_mvt1_need_refresh; +explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; +abort; + +begin; +create incremental materialized view aqumv_mvt1_0 as select * from aqumv_t1 where c1 = 2; +analyze aqumv_mvt1_0; + +set local answer_query_using_materialized_views = off; +select * from aqumv_t1 where c1 = 2; +select c1, c2, c3 from aqumv_t1 where c1 = 2; +select c2 from aqumv_t1 where c1 = 2; +select c3, c2 from aqumv_t1 where c1 = 2; + +set local answer_query_using_materialized_views = on; +select * from aqumv_t1 where c1 = 2; +select c1, c2, c3 from aqumv_t1 where c1 = 2; +select c2 from aqumv_t1 where c1 = 2; +select c3, c2 from aqumv_t1 where c1 = 2; +-- tlist matched. +explain(verbose, costs off) select * from aqumv_t1 where c1 = 2; +explain(verbose, costs off) select c1, c2, c3 from aqumv_t1 where c1 = 2; +-- tlist partially matched. +explain(verbose, costs off) select c2 from aqumv_t1 where c1 = 2; +-- tlist disorder. +explain(verbose, costs off) select c3, c2 from aqumv_t1 where c1 = 2; +abort; + +begin; +create incremental materialized view aqumv_mvt1_1 as select c2 as mc2, c3 as mc3, c1 as mc1, c2 as mc2_1 from aqumv_t1 where c1 = 3; +analyze aqumv_mvt1_1; +set local answer_query_using_materialized_views = off; +select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; +select c1, c1 from aqumv_t1 where c1 = 3; + +set local answer_query_using_materialized_views = on; +select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; +select c1, c1 from aqumv_t1 where c1 = 3; + +-- tlist alias. +explain(verbose, costs off) select c1 as col1, c2 as col2 from aqumv_t1 where c1 = 3; +-- duplicated projection. +explain(verbose, costs off) select c1, c1 from aqumv_t1 where c1 = 3; +abort; + +begin; +create incremental materialized view aqumv_mvt1_nonvar_expr as + select c2, 1 as mc_const_1, sqrt(100) as mc_sqrt_100 + from aqumv_t1 where c1 = 4; +analyze aqumv_mvt1_nonvar_expr; +set local answer_query_using_materialized_views = off; +select c2, 200 from aqumv_t1 where c1 = 4; +select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; + +set local answer_query_using_materialized_views = on; +select c2, 200 from aqumv_t1 where c1 = 4; +select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; + +-- Const are copied to output. +explain(verbose, costs off) select c2, 200 from aqumv_t1 where c1 = 4; +explain(verbose, costs off) select c2, 1, sqrt(100) from aqumv_t1 where c1 = 4; +abort; + +begin; +CREATE OR REPLACE FUNCTION aqumv_func(i integer, j integer) RETURNS integer AS $$ + BEGIN + RETURN i + j; + END; +$$ LANGUAGE plpgsql IMMUTABLE; +create incremental materialized view aqumv_mvt1_func_has_var as + select c2, aqumv_func(c1, c3) as mc_func_res + from aqumv_t1 where c1 = 5; +analyze aqumv_mvt1_func_has_var; + +set local answer_query_using_materialized_views = off; +select c2, aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; + +set local answer_query_using_materialized_views = on; +select c2, aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; + +-- Functions has Vars are replaced. +explain(verbose, costs off) select c2, aqumv_func(c1, c3), aqumv_func(c1, c3) from aqumv_t1 where c1 = 5; +abort; + +begin; +create incremental materialized view aqumv_mvt1_2 as select c2 as mc2, c1 as mc1 from aqumv_t1 where c1 > 1 and c1 < 5; +analyze aqumv_mvt1_2; +set local answer_query_using_materialized_views = on; +-- shoud be unable to use mv, projection doesn't exit in mv's tlist +explain(verbose, costs off) select c3 from aqumv_t1 where c1 < 5 and c1 > 1; +-- no post quals. +explain(verbose, costs off) select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1; + +-- post quals added to mv. +set local answer_query_using_materialized_views = off; +select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; +set local answer_query_using_materialized_views = on; +select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; +explain(verbose, costs off) select c1, c2 from aqumv_t1 where c1 < 5 and c1 > 1 and c2 = 4; + +-- should be unable to use mv, post quals has column doesn't exit in mv's tlist. +explain(verbose, costs off) select * from aqumv_t1 where c1 < 5 and c1 > 1 and c3 > 1; +abort; + +begin; +create incremental materialized view aqumv_mvt1_3 as select c2 as mc2, c1 as mc1, c3+1 as mc3 from aqumv_t1 where c1 > 5 and c1 < 10; +analyze aqumv_mvt1_3; + +set local answer_query_using_materialized_views = on; +-- should be unable to use mv, column c3 doesn't exist in mv's tlist. +explain(verbose, costs off) select * from aqumv_t1 where c1 > 5 and c1 < 10; +-- expr c3+1 is in mv's tlist +explain(verbose, costs off) select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; +-- expr c1+1 could be derived from mv's tlist +explain(verbose, costs off) select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; + +select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; +select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; +set local answer_query_using_materialized_views = off; +select c1 as col1, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; +select c1+1 as col1, c2, c3+1 as col2 from aqumv_t1 where c1 > 5 and c1 < 10; +abort; + +begin; +create incremental materialized view aqumv_mvt1_4 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 10 and c1 < 15; + +analyze aqumv_mvt1_4; + +set local answer_query_using_materialized_views = on; +-- complex exprs +explain(verbose, costs off) select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 10 and c1 < 15; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 10 and c1 < 15; + +set local answer_query_using_materialized_views = off; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 from aqumv_t1 where c1 > 10 and c1 < 15; +abort; + +-- post quals +begin; +create incremental materialized view aqumv_mvt1_post_quals as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 20 and c1 < 30; + +analyze aqumv_mvt1_post_quals; + +set local answer_query_using_materialized_views = on; + +explain(verbose, costs off) + select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > 1; + +explain(verbose, costs off) + select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > abs(c1) + 2; + +explain(verbose, costs off) +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(abs(c2) - c1 - 1) + 10) > 2; + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > 1; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > abs(c1) + 2; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(abs(c2) - c1 - 1) + 10) > 2; + +set local answer_query_using_materialized_views = off; + +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > 1; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(c2) + 1) > abs(c1) + 2; +select c1, sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) + 1, abs(c2) + 1 + from aqumv_t1 where c1 > 20 and c1 < 30 and sqrt(abs(abs(c2) - c1 - 1) + 10) > 2; + +abort; + +-- choose the best one if there are multiple chooses based on cost. +begin; +set local answer_query_using_materialized_views = on; + +create incremental materialized view aqumv_mvt1_candidate_0 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3 + from aqumv_t1 where c1 > 30; +analyze aqumv_mvt1_candidate_0; +-- choose aqumv_mvt1_candidate_0 +explain(verbose, costs off) +select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 < 40 and sqrt(abs(c2)) > 5.8; + +create incremental materialized view aqumv_mvt1_candidate_1 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 30 and c1 < 40; +analyze aqumv_mvt1_candidate_1; +-- choose aqumv_mvt1_candidate_1 as it has lower cost(less rows). +explain(verbose, costs off) +select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 < 40 and sqrt(abs(c2)) > 5.8; + +create incremental materialized view aqumv_mvt1_candidate_2 as + select c1 as mc1, c2 as mc2, abs(c2) as mc3, abs(abs(c2) - c1 - 1) as mc4 + from aqumv_t1 where c1 > 30 and c1 < 500; +analyze aqumv_mvt1_candidate_2; +-- still choose aqumv_mvt1_candidate_1 as it has lowest cost(less rows). +explain(verbose, costs off) +select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 < 40 and sqrt(abs(c2)) > 5.8; +abort; + +reset optimizer; +reset answer_query_using_materialized_views; +drop table aqumv_t1 cascade;