From d287f226b488b8b0f8c6f3e068acf4885e097cfd Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Wed, 6 Nov 2024 16:07:54 +0800 Subject: [PATCH 1/2] Enable answer query using Materialized View for external table. Allow answer query using materialized views which have external or foreign tables. Since we don't know if the data is up to date of externel table outside CBDB, introduce a new GUC: aqumv_allow_foreign_table Let user decide if they want to use matview instead of query on external tables. create materialized view aqumv_ext_mv as select * from aqumv_ext_r; explain (costs off, verbose) select * from aqumv_ext_r; QUERY PLAN ------------------------------------------ Gather Motion 3:1 (slice1; segments: 3) Output: id -> Seq Scan on aqumv.aqumv_ext_mv Output: id Optimizer: Postgres query optimizer Index could also be used if there were on matviews. create index on aqumv_ext_mv(id); explain (costs off, verbose) select * from aqumv_ext_r where id = 5; QUERY PLAN ---------------------------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) Output: id -> Index Only Scan using aqumv_ext_mv_id_idx on aqumv.aqumv_ext_mv Output: id Index Cond: (aqumv_ext_mv.id = 5) Optimizer: Postgres query optimizer --- src/backend/catalog/gp_matview_aux.c | 12 ++- src/backend/optimizer/plan/aqumv.c | 3 + src/backend/utils/misc/guc_gp.c | 15 +++- src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 1 + src/test/regress/expected/aqumv.out | 105 +++++++++++++++++++++++++++ src/test/regress/sql/aqumv.sql | 48 ++++++++++++ 7 files changed, 180 insertions(+), 5 deletions(-) diff --git a/src/backend/catalog/gp_matview_aux.c b/src/backend/catalog/gp_matview_aux.c index cddba123d68..3a3c1a06598 100644 --- a/src/backend/catalog/gp_matview_aux.c +++ b/src/backend/catalog/gp_matview_aux.c @@ -88,8 +88,16 @@ GetViewBaseRelids(const Query *viewQuery) if (rte->rtekind != RTE_RELATION) return NIL; - /* Only support normal relation now. */ - if (get_rel_relkind(rte->relid) != RELKIND_RELATION) + char relkind = get_rel_relkind(rte->relid); + + /* + * Allow foreign table here, however we don't know if the data is + * up to date or not of the view. + * But if users want to query matview instead of query foreign tables + * outside CBDB, let them decide with aqumv_allow_foreign_table. + */ + if (relkind != RELKIND_RELATION && + relkind != RELKIND_FOREIGN_TABLE) return NIL; /* diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c index d8b4e93e5bb..ee843956231 100644 --- a/src/backend/optimizer/plan/aqumv.c +++ b/src/backend/optimizer/plan/aqumv.c @@ -151,6 +151,9 @@ answer_query_using_materialized_views(PlannerInfo *root, has_subclass(origin_rel_oid)) return mv_final_rel; + if (get_rel_relkind(origin_rel_oid) == RELKIND_FOREIGN_TABLE && !aqumv_allow_foreign_table) + return mv_final_rel; + ruleDesc = table_open(RewriteRelationId, AccessShareLock); rcscan = systable_beginscan(ruleDesc, InvalidOid, false, diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index c1bac91236c..35e7078ff9b 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -446,7 +446,8 @@ bool gp_enable_predicate_pushdown; int gp_predicate_pushdown_sample_rows; bool enable_offload_entry_to_qe = false; -bool enable_answer_query_using_materialized_views = false; +bool enable_answer_query_using_materialized_views = false; +bool aqumv_allow_foreign_table = false; bool gp_log_endpoints = false; @@ -3114,8 +3115,16 @@ struct config_bool ConfigureNamesBool_gp[] = true, NULL, NULL, NULL }, - - + { + {"aqumv_allow_foreign_table", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("allow answer query using materialized views which have foreign or external tables."), + NULL, + GUC_EXPLAIN + }, + &aqumv_allow_foreign_table, + false, + NULL, NULL, NULL + }, { {"gp_log_suboverflow_statement", PGC_SUSET, LOGGING_WHAT, gettext_noop("Enable logging of statements that cause subtransaction overflow."), diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 3eac20b1013..f8a715632c3 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -298,6 +298,7 @@ extern int gp_appendonly_insert_files_tuples_range; extern int gp_random_insert_segments; extern bool enable_answer_query_using_materialized_views; extern bool enable_offload_entry_to_qe; +extern bool aqumv_allow_foreign_table; /* * gp_enable_multiphase_limit is not cost based. * When set to false, the planner will not use multi-phase limit. diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index a064c3dd444..2e74fc61eae 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -11,6 +11,7 @@ /* items in this file should be ordered */ "enable_answer_query_using_materialized_views", "application_name", + "aqumv_allow_foreign_table", "archive_cleanup_command", "archive_command", "archive_mode", diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out index 7304ca663ef..552a4778687 100644 --- a/src/test/regress/expected/aqumv.out +++ b/src/test/regress/expected/aqumv.out @@ -2581,6 +2581,111 @@ select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows wit (4 rows) abort; +-- +-- Test external table +-- +begin; +CREATE OR REPLACE FUNCTION write_to_file() RETURNS integer as '$libdir/gpextprotocol.so', 'demoprot_export' LANGUAGE C STABLE NO SQL; +CREATE OR REPLACE FUNCTION read_from_file() RETURNS integer as '$libdir/gpextprotocol.so', 'demoprot_import' LANGUAGE C STABLE NO SQL; +--start_ignore +DROP PROTOCOL IF EXISTS demoprot; +NOTICE: protocol "demoprot" does not exist, skipping +--end_ignore +CREATE TRUSTED PROTOCOL demoprot (readfunc = 'read_from_file', writefunc = 'write_to_file'); -- should succeed +CREATE WRITABLE EXTERNAL TABLE aqumv_ext_w(id int) + LOCATION('demoprot://aqumvtextfile.txt') +FORMAT 'text' +DISTRIBUTED BY (id); +INSERT INTO aqumv_ext_w SELECT * FROM generate_series(1, 10); +CREATE READABLE EXTERNAL TABLE aqumv_ext_r(id int) + LOCATION('demoprot://aqumvtextfile.txt') +FORMAT 'text'; +create materialized view aqumv_ext_mv as + select * from aqumv_ext_r; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +analyze aqumv_ext_mv; +explain (costs off, verbose) +select * from aqumv_ext_r; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: id + -> Foreign Scan on aqumv.aqumv_ext_r + Output: id + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +select * from aqumv_ext_r; + id +---- + 2 + 3 + 4 + 7 + 8 + 5 + 6 + 9 + 10 + 1 +(10 rows) + +set local enable_answer_query_using_materialized_views = on; +set local aqumv_allow_foreign_table = on; +explain (costs off, verbose) +select * from aqumv_ext_r; + QUERY PLAN +---------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Output: id + -> Seq Scan on aqumv.aqumv_ext_mv + Output: id + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(6 rows) + +select * from aqumv_ext_r; + id +---- + 1 + 2 + 3 + 4 + 7 + 8 + 5 + 6 + 9 + 10 +(10 rows) + +create index on aqumv_ext_mv(id); +set local enable_seqscan = off; +explain (costs off, verbose) +select * from aqumv_ext_r where id = 5; + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + Output: id + -> Index Only Scan using aqumv_ext_mv_id_idx on aqumv.aqumv_ext_mv + Output: id + Index Cond: (aqumv_ext_mv.id = 5) + Settings: enable_answer_query_using_materialized_views = 'on', enable_seqscan = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(7 rows) + +select * from aqumv_ext_r where id = 5; + id +---- + 5 +(1 row) + +abort; +-- +-- End of test external table +-- reset optimizer; reset enable_answer_query_using_materialized_views; -- start_ignore diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql index 1f98f867698..6fe19733556 100644 --- a/src/test/regress/sql/aqumv.sql +++ b/src/test/regress/sql/aqumv.sql @@ -626,6 +626,54 @@ select c2, c3 from aqumv_t7 where c1 > 90 order by c2, c3 fetch first 3 rows wit abort; +-- +-- Test external table +-- +begin; + +CREATE OR REPLACE FUNCTION write_to_file() RETURNS integer as '$libdir/gpextprotocol.so', 'demoprot_export' LANGUAGE C STABLE NO SQL; +CREATE OR REPLACE FUNCTION read_from_file() RETURNS integer as '$libdir/gpextprotocol.so', 'demoprot_import' LANGUAGE C STABLE NO SQL; +--start_ignore +DROP PROTOCOL IF EXISTS demoprot; +--end_ignore +CREATE TRUSTED PROTOCOL demoprot (readfunc = 'read_from_file', writefunc = 'write_to_file'); -- should succeed + +CREATE WRITABLE EXTERNAL TABLE aqumv_ext_w(id int) + LOCATION('demoprot://aqumvtextfile.txt') + +FORMAT 'text' +DISTRIBUTED BY (id); + +INSERT INTO aqumv_ext_w SELECT * FROM generate_series(1, 10); + +CREATE READABLE EXTERNAL TABLE aqumv_ext_r(id int) + LOCATION('demoprot://aqumvtextfile.txt') +FORMAT 'text'; + +create materialized view aqumv_ext_mv as + select * from aqumv_ext_r; +analyze aqumv_ext_mv; + +explain (costs off, verbose) +select * from aqumv_ext_r; +select * from aqumv_ext_r; +set local enable_answer_query_using_materialized_views = on; +set local aqumv_allow_foreign_table = on; +explain (costs off, verbose) +select * from aqumv_ext_r; +select * from aqumv_ext_r; + +create index on aqumv_ext_mv(id); +set local enable_seqscan = off; +explain (costs off, verbose) +select * from aqumv_ext_r where id = 5; +select * from aqumv_ext_r where id = 5; + +abort; +-- +-- End of test external table +-- + reset optimizer; reset enable_answer_query_using_materialized_views; -- start_ignore From ddfa37a9b6bcbd3975d95aa3663efe7bbc51c0f0 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 25 Nov 2024 15:36:39 +0800 Subject: [PATCH 2/2] Avoid REFREH fast path if matview has foreign tables. For matview has foreign tables, we never know the data status. Since we usually create a matview has read external tables, the initial data status is Up-to-date. That will make REFRESH fail to do the real thing with fast path feature, the worst case is we never try to get the external data during REFRESH. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/catalog/gp_matview_aux.c | 23 ++++++++++++-- src/backend/commands/matview.c | 3 +- src/include/catalog/gp_matview_aux.h | 5 ++- src/test/regress/expected/aqumv.out | 46 ++++++++++++++++++++++++++++ src/test/regress/sql/aqumv.sql | 7 +++++ 5 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/backend/catalog/gp_matview_aux.c b/src/backend/catalog/gp_matview_aux.c index 3a3c1a06598..a43bd27cd49 100644 --- a/src/backend/catalog/gp_matview_aux.c +++ b/src/backend/catalog/gp_matview_aux.c @@ -50,7 +50,7 @@ static void SetMatviewAuxStatus_guts(Oid mvoid, char status); * Return NIL if the query we think it's useless. */ List* -GetViewBaseRelids(const Query *viewQuery) +GetViewBaseRelids(const Query *viewQuery, bool *has_foreign) { List *relids = NIL; Node *mvjtnode; @@ -100,6 +100,9 @@ GetViewBaseRelids(const Query *viewQuery) relkind != RELKIND_FOREIGN_TABLE) return NIL; + if (has_foreign) + *has_foreign = relkind == RELKIND_FOREIGN_TABLE; + /* * inherit tables are not supported. * FIXME: left a door for partition table which will be supported soon. @@ -140,11 +143,12 @@ InsertMatviewAuxEntry(Oid mvoid, const Query *viewQuery, bool skipdata) Datum values[Natts_gp_matview_aux]; List *relids; NameData mvname; + bool has_foreign = false; Assert(OidIsValid(mvoid)); /* Empty relids means the view is not supported now. */ - relids = GetViewBaseRelids(viewQuery); + relids = GetViewBaseRelids(viewQuery, &has_foreign); if (relids == NIL) return; @@ -157,6 +161,8 @@ InsertMatviewAuxEntry(Oid mvoid, const Query *viewQuery, bool skipdata) namestrcpy(&mvname, get_rel_name(mvoid)); values[Anum_gp_matview_aux_mvname - 1] = NameGetDatum(&mvname); + + values[Anum_gp_matview_aux_has_foreign - 1] = BoolGetDatum(has_foreign); if (skipdata) values[Anum_gp_matview_aux_datastatus - 1] = CharGetDatum(MV_DATA_STATUS_EXPIRED); @@ -449,6 +455,19 @@ MatviewUsableForAppendAgg(Oid mvoid) (auxform->datastatus == MV_DATA_STATUS_EXPIRED_INSERT_ONLY)); } +bool +MatviewHasForeignTables(Oid mvoid) +{ + HeapTuple mvauxtup = SearchSysCacheCopy1(MVAUXOID, ObjectIdGetDatum(mvoid)); + + /* Not a candidate we recorded. */ + if (!HeapTupleIsValid(mvauxtup)) + return false; + + Form_gp_matview_aux auxform = (Form_gp_matview_aux) GETSTRUCT(mvauxtup); + return auxform->has_foreign; +} + /* * Is the view data up to date? * In most cases, we should use this function to check if view diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 3d814f501e9..8b14f1c9f51 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -482,7 +482,8 @@ ExecRefreshMatView(RefreshMatViewStmt *stmt, const char *queryString, if (gp_enable_refresh_fast_path && !RelationIsIVM(matviewRel) && !stmt->skipData && - MatviewIsUpToDate(matviewOid)) + MatviewIsUpToDate(matviewOid) && + !MatviewHasForeignTables(matviewOid)) { table_close(matviewRel, NoLock); diff --git a/src/include/catalog/gp_matview_aux.h b/src/include/catalog/gp_matview_aux.h index 21e714075d5..febd9513751 100644 --- a/src/include/catalog/gp_matview_aux.h +++ b/src/include/catalog/gp_matview_aux.h @@ -27,6 +27,7 @@ CATALOG(gp_matview_aux,7153,GpMatviewAuxId) BKI_SHARED_RELATION { Oid mvoid; /* materialized view oid */ NameData mvname; /* materialized view name */ + bool has_foreign; /* view query has foreign tables? */ /* view's data status */ char datastatus; } FormData_gp_matview_aux; @@ -57,12 +58,14 @@ extern void InsertMatviewAuxEntry(Oid mvoid, const Query *viewQuery, bool skipda extern void RemoveMatviewAuxEntry(Oid mvoid); -extern List* GetViewBaseRelids(const Query *viewQuery); +extern List* GetViewBaseRelids(const Query *viewQuery, bool *has_foreign); extern void SetRelativeMatviewAuxStatus(Oid relid, char status); extern void SetMatviewAuxStatus(Oid mvoid, char status); +extern bool MatviewHasForeignTables(Oid mvoid); + extern bool MatviewUsableForAppendAgg(Oid mvoid); extern bool MatviewIsGeneralyUpToDate(Oid mvoid); diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out index 552a4778687..ba4772e826d 100644 --- a/src/test/regress/expected/aqumv.out +++ b/src/test/regress/expected/aqumv.out @@ -2682,6 +2682,52 @@ select * from aqumv_ext_r where id = 5; 5 (1 row) +-- refresh matview has foreign tables should not go fast path. +select * from aqumv_ext_mv; + id +---- + 5 + 6 + 9 + 10 + 2 + 3 + 4 + 7 + 8 + 1 +(10 rows) + +INSERT INTO aqumv_ext_w SELECT * FROM generate_series(10, 15); +set local gp_enable_refresh_fast_path = on; +select datastatus from gp_matview_aux where mvoid = 'aqumv_ext_mv'::regclass::oid; + datastatus +------------ + u +(1 row) + +refresh materialized view aqumv_ext_mv; +select * from aqumv_ext_mv; + id +---- + 2 + 3 + 4 + 7 + 8 + 1 + 12 + 15 + 5 + 6 + 9 + 10 + 10 + 11 + 13 + 14 +(16 rows) + abort; -- -- End of test external table diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql index 6fe19733556..8fee68f311c 100644 --- a/src/test/regress/sql/aqumv.sql +++ b/src/test/regress/sql/aqumv.sql @@ -669,6 +669,13 @@ explain (costs off, verbose) select * from aqumv_ext_r where id = 5; select * from aqumv_ext_r where id = 5; +-- refresh matview has foreign tables should not go fast path. +select * from aqumv_ext_mv; +INSERT INTO aqumv_ext_w SELECT * FROM generate_series(10, 15); +set local gp_enable_refresh_fast_path = on; +select datastatus from gp_matview_aux where mvoid = 'aqumv_ext_mv'::regclass::oid; +refresh materialized view aqumv_ext_mv; +select * from aqumv_ext_mv; abort; -- -- End of test external table