diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index ebec8fa5b89..901573fa433 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -36,6 +36,7 @@ #include "utils/memdebug.h" #include "utils/memutils.h" #include "utils/snapmgr.h" +#include "commands/vacuum.h" static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf); static void _bt_log_reuse_page(Relation rel, BlockNumber blkno, @@ -186,6 +187,10 @@ _bt_vacuum_needs_cleanup(Relation rel) uint32 btm_version; BlockNumber prev_num_delpages; + /* Return true directly on QE for stats collection from QD. */ + if (gp_vacuum_needs_update_stats()) + return true; + /* * Copy details from metapage to local variables quickly. * diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 906d753d222..5e3a2b7aa47 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -956,7 +956,6 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) */ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); btvacuumscan(info, stats, NULL, NULL, 0); - stats->estimated_count = true; } /* diff --git a/src/backend/cdb/dispatcher/cdbdispatchresult.c b/src/backend/cdb/dispatcher/cdbdispatchresult.c index 09d712a7304..37fead28981 100644 --- a/src/backend/cdb/dispatcher/cdbdispatchresult.c +++ b/src/backend/cdb/dispatcher/cdbdispatchresult.c @@ -821,6 +821,8 @@ cdbdisp_returnResults(CdbDispatchResults *primaryResults, CdbPgResults *cdb_pgre /* tell the caller how many sets we're returning. */ cdb_pgresults->numResults = nresults; + /* set the number of dispatched QE */ + cdb_pgresults->numDispatches = primaryResults->resultCount; } /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index d8cb3975af5..0e1f84cf118 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -78,7 +78,8 @@ typedef struct VacuumStatsContext { - List *updated_stats; + List *updated_stats; + int nsegs; } VacuumStatsContext; /* @@ -122,7 +123,7 @@ static void dispatchVacuum(VacuumParams *params, Oid relid, static List *vacuum_params_to_options_list(VacuumParams *params); static void vacuum_combine_stats(VacuumStatsContext *stats_context, CdbPgResults *cdb_pgresults); -static void vac_update_relstats_from_list(List *updated_stats); +static void vac_update_relstats_from_list(VacuumStatsContext *stats_context); /* * Primary entry point for manual VACUUM and ANALYZE commands @@ -2687,7 +2688,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, stats_context.updated_stats = NIL; dispatchVacuum(params, relid, &stats_context); - vac_update_relstats_from_list(stats_context.updated_stats); + vac_update_relstats_from_list(&stats_context); /* Also update pg_stat_last_operation */ if (IsAutoVacuumWorkerProcess()) @@ -3081,6 +3082,8 @@ vacuum_combine_stats(VacuumStatsContext *stats_context, CdbPgResults *cdb_pgresu if (cdb_pgresults == NULL || cdb_pgresults->numResults <= 0) return; + stats_context->nsegs = cdb_pgresults->numDispatches; + /* * Process the dispatch results from the primary. Note that the QE * processes also send back the new stats info, such as stats on @@ -3093,9 +3096,7 @@ vacuum_combine_stats(VacuumStatsContext *stats_context, CdbPgResults *cdb_pgresu * */ for(result_no = 0; result_no < cdb_pgresults->numResults; result_no++) - { - VPgClassStats *pgclass_stats = NULL; - ListCell *lc = NULL; + { struct pg_result *pgresult = cdb_pgresults->pg_results[result_no]; if (pgresult->extras == NULL || pgresult->extraType != PGExtraTypeVacuumStats) @@ -3107,30 +3108,38 @@ vacuum_combine_stats(VacuumStatsContext *stats_context, CdbPgResults *cdb_pgresu * Process the stats for pg_class. We simply compute the maximum * number of rel_tuples and rel_pages. */ - pgclass_stats = (VPgClassStats *) pgresult->extras; + VPgClassStatsCombo *pgclass_stats_combo = (VPgClassStatsCombo *) pgresult->extras; + ListCell *lc = NULL; + foreach (lc, stats_context->updated_stats) { - VPgClassStats *tmp_stats = (VPgClassStats *) lfirst(lc); + VPgClassStatsCombo *tmp_stats_combo = (VPgClassStatsCombo *) lfirst(lc); - if (tmp_stats->relid == pgclass_stats->relid) + if (tmp_stats_combo->relid == pgclass_stats_combo->relid) { - tmp_stats->rel_pages += pgclass_stats->rel_pages; - tmp_stats->rel_tuples += pgclass_stats->rel_tuples; - tmp_stats->relallvisible += pgclass_stats->relallvisible; + tmp_stats_combo->rel_pages += pgclass_stats_combo->rel_pages; + tmp_stats_combo->rel_tuples += pgclass_stats_combo->rel_tuples; + tmp_stats_combo->relallvisible += pgclass_stats_combo->relallvisible; + /* + * Accumulate the number of QEs, assuming sending only once + * per QE for each relid in the VACUUM scenario. + */ + tmp_stats_combo->count++; break; } } - if (lc == NULL) + if (lc == NULL) /* get the first stats result of the current relid */ { Assert(pgresult->extraslen == sizeof(VPgClassStats)); old_context = MemoryContextSwitchTo(vac_context); - pgclass_stats = palloc(sizeof(VPgClassStats)); - memcpy(pgclass_stats, pgresult->extras, pgresult->extraslen); + pgclass_stats_combo = palloc(sizeof(VPgClassStatsCombo)); + memcpy(pgclass_stats_combo, pgresult->extras, pgresult->extraslen); + pgclass_stats_combo->count = 1; stats_context->updated_stats = - lappend(stats_context->updated_stats, pgclass_stats); + lappend(stats_context->updated_stats, pgclass_stats_combo); MemoryContextSwitchTo(old_context); } } @@ -3140,8 +3149,9 @@ vacuum_combine_stats(VacuumStatsContext *stats_context, CdbPgResults *cdb_pgresu * Update relpages/reltuples of all the relations in the list. */ static void -vac_update_relstats_from_list(List *updated_stats) +vac_update_relstats_from_list(VacuumStatsContext *stats_context) { + List *updated_stats = stats_context->updated_stats; ListCell *lc; /* @@ -3152,7 +3162,7 @@ vac_update_relstats_from_list(List *updated_stats) foreach (lc, updated_stats) { - VPgClassStats *stats = (VPgClassStats *) lfirst(lc); + VPgClassStatsCombo *stats = (VPgClassStatsCombo *) lfirst(lc); Relation rel; int16 ao_segfile_count = 0; @@ -3160,6 +3170,8 @@ vac_update_relstats_from_list(List *updated_stats) if (GpPolicyIsReplicated(rel->rd_cdbpolicy)) { + Assert(stats->count == rel->rd_cdbpolicy->numsegments); + stats->rel_pages = stats->rel_pages / rel->rd_cdbpolicy->numsegments; stats->rel_tuples = stats->rel_tuples / rel->rd_cdbpolicy->numsegments; stats->relallvisible = stats->relallvisible / rel->rd_cdbpolicy->numsegments; @@ -3201,17 +3213,55 @@ vac_update_relstats_from_list(List *updated_stats) } /* - * Pass 'false' for isvacuum, so that the stats are - * actually updated. + * Update QD stats only when receiving all dispatched QEs' stats, to + * avoid being overwritten by a partial accumulated value (i.e., index->reltuples) + * in case when not receiving all QEs' stats. */ - vac_update_relstats(rel, - stats->rel_pages, stats->rel_tuples, - stats->relallvisible, - rel->rd_rel->relhasindex, - InvalidTransactionId, - InvalidMultiXactId, - false, - false /* isvacuum */); + if (stats_context->nsegs > 0 && stats->count == stats_context->nsegs) + { + /* + * Pass 'false' for isvacuum, so that the stats are + * actually updated. + */ + vac_update_relstats(rel, + stats->rel_pages, stats->rel_tuples, + stats->relallvisible, + rel->rd_rel->relhasindex, + InvalidTransactionId, + InvalidMultiXactId, + false, + false /* isvacuum */); + } + else + { + /* + * We do have chance to enter this branch in the case when in compact phase. + * For example, in compact phase, some QEs may need to drop dead segfiles, + * while others may not. Only the QEs which dropping dead segfiles could go to + * vacuum indexes path then update and send the statistics to QD, QD just + * collected part of QEs' stats hence should not be as the final result to + * overwrite QD's stats. + * + * One may think why not having the stats update only happens in the final + * phase (POST_CLEANUP_PHASE), yes that's an alternative to get a final stats + * accurately for QD. + * + * Given the AO/CO VACUUM is a multi-phases process which may have an interval + * between each phase. In real circumstance, concurrent VACUUM is mostly a heavy + * job and this interval could get longer than normal cases, hence it seems + * better to collect and update QD's stats timely. So current strategy is, QD always + * collect QE's stats across phases, once we collected the expected number (means + * same as dispatched QE number) of QE's stats, we update QD's stats subsequently, + * instead of updating at the final phase. + * + * Set the logging level to LOG as skipping sending stats here is not considered as + * a real issue, displaying it in log may be helpful to hint. + */ + elog(LOG, "Vacuum update stats oid=%u pages=%d tuples=%f was skipped because " + "collected segment number %d didn't match the expected %d.", stats->relid, + stats->rel_pages, stats->rel_tuples, stats->count, stats_context->nsegs); + } + relation_close(rel, AccessShareLock); } } @@ -3277,3 +3327,18 @@ vacuumStatement_IsTemporary(Relation onerel) bTemp = isAnyTempNamespace(RelationGetNamespace(onerel)); return bTemp; } + +/* + * GPDB: Check whether needs to update or send stats from QE to QD. + * This is GPDB specific check in vacuum-index scenario for collecting + * QEs' stats (such as index->relpages and index->reltuples) on QD. + * GPDB needs accumulating all QEs' stats for updating corresponding + * statistics into QD's pg_class correctly. So if current instance is + * acting as QE, it should scan and send its current stats to QD instead + * of skipping them for cost saving. + */ +bool +gp_vacuum_needs_update_stats(void) +{ + return (Gp_role == GP_ROLE_EXECUTE); +} diff --git a/src/backend/commands/vacuum_ao.c b/src/backend/commands/vacuum_ao.c index 17c4d7826f2..531d0eade4a 100644 --- a/src/backend/commands/vacuum_ao.c +++ b/src/backend/commands/vacuum_ao.c @@ -145,7 +145,7 @@ static void vacuum_appendonly_index(Relation indexRelation, - double rel_tuple_count, + Relation aoRelation, Bitmapset *dead_segs, int elevel, BufferAccessStrategy bstrategy); @@ -491,7 +491,10 @@ vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs { for (i = 0; i < nindexes; i++) { - scan_index(Irel[i], aoRelation , elevel, bstrategy); + scan_index(Irel[i], + aoRelation, + elevel, + bstrategy); } } else @@ -499,7 +502,7 @@ vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs for (i = 0; i < nindexes; i++) { vacuum_appendonly_index(Irel[i], - aoRelation->rd_rel->reltuples, + aoRelation, dead_segs, elevel, bstrategy); @@ -516,11 +519,10 @@ vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs * * This is called after an append-only segment file compaction to move * all tuples from the compacted segment files. - * The segmentFileList is an */ static void vacuum_appendonly_index(Relation indexRelation, - double rel_tuple_count, + Relation aoRelation, Bitmapset *dead_segs, int elevel, BufferAccessStrategy bstrategy) @@ -534,8 +536,14 @@ vacuum_appendonly_index(Relation indexRelation, pg_rusage_init(&ru0); ivinfo.index = indexRelation; + ivinfo.analyze_only = false; ivinfo.message_level = elevel; - ivinfo.num_heap_tuples = rel_tuple_count; + /* + * We can only provide the AO rel's reltuples as an estimate + * (similar to heapam. See: lazy_vacuum_index()). + */ + ivinfo.num_heap_tuples = aoRelation->rd_rel->reltuples; + ivinfo.estimated_count = true; ivinfo.strategy = bstrategy; /* Do bulk deletion */ @@ -680,9 +688,7 @@ vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot, int elevel, * We use this when we have no deletions to do. */ void -scan_index(Relation indrel, - Relation aorel, - int elevel, BufferAccessStrategy vac_strategy) +scan_index(Relation indrel, Relation aorel, int elevel, BufferAccessStrategy vac_strategy) { IndexBulkDeleteResult *stats; IndexVacuumInfo ivinfo; @@ -692,9 +698,13 @@ scan_index(Relation indrel, ivinfo.index = indrel; ivinfo.analyze_only = false; - ivinfo.estimated_count = false; ivinfo.message_level = elevel; + /* + * We can only provide the AO rel's reltuples as an estimate + * (similar to heapam. See: lazy_vacuum_index()). + */ ivinfo.num_heap_tuples = aorel->rd_rel->reltuples; + ivinfo.estimated_count = true; ivinfo.strategy = vac_strategy; diff --git a/src/include/cdb/cdbdispatchresult.h b/src/include/cdb/cdbdispatchresult.h index f03bce05d12..efbce12bdd7 100644 --- a/src/include/cdb/cdbdispatchresult.h +++ b/src/include/cdb/cdbdispatchresult.h @@ -29,6 +29,7 @@ typedef struct CdbPgResults { struct pg_result **pg_results; int numResults; + int numDispatches; /* the number of dispatched QE, from CdbDispatchResults.resultCount */ }CdbPgResults; /* diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 3fba8197b95..67f41613ad2 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -258,6 +258,16 @@ typedef struct VPgClassStats BlockNumber relallvisible; } VPgClassStats; +typedef struct VPgClassStatsCombo +{ + Oid relid; + BlockNumber rel_pages; + double rel_tuples; + BlockNumber relallvisible; + + int count; /* expect to equal to the number of dispatched segments */ +} VPgClassStatsCombo; + /* * Parameters customizing behavior of VACUUM and ANALYZE. * @@ -386,6 +396,7 @@ extern void analyze_rel(Oid relid, RangeVar *relation, extern void lazy_vacuum_rel_heap(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy); extern void scan_index(Relation indrel, Relation aorel, int elevel, BufferAccessStrategy bstrategy); + /* in commands/vacuum_ao.c */ extern void ao_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy); @@ -407,4 +418,6 @@ extern int acquire_inherited_sample_rows(Relation onerel, int elevel, extern Datum gp_acquire_sample_rows(PG_FUNCTION_ARGS); extern Oid gp_acquire_sample_rows_col_type(Oid typid); +extern bool gp_vacuum_needs_update_stats(void); + #endif /* VACUUM_H */ diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index 1165fa5efa1..39fea6969cf 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -335,18 +335,112 @@ VACUUM delete_test_table; -- The vacuum above should've turned the leaf page into a fast root. We just -- need to insert some rows to cause the fast root page to split. INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i; --- Test unsupported btree opclass parameters -create index on btree_tall_tbl (id int4_ops(foo=1)); -ERROR: operator class int4_ops has no options --- Test case of ALTER INDEX with abuse of column names for indexes. --- This grammar is not officially supported, but the parser allows it. -CREATE INDEX btree_tall_idx2 ON btree_tall_tbl (id); -ALTER INDEX btree_tall_idx2 ALTER COLUMN id SET (n_distinct=100); -ERROR: "btree_tall_idx2" is not a table, materialized view, or foreign table -DROP INDEX btree_tall_idx2; --- Partitioned index -CREATE TABLE btree_part (id int4) PARTITION BY RANGE (id); -CREATE INDEX btree_part_idx ON btree_part(id); -ALTER INDEX btree_part_idx ALTER COLUMN id SET (n_distinct=100); -ERROR: "btree_part_idx" is not a table, materialized view, or foreign table -DROP TABLE btree_part; +-- +-- GPDB: Test correctness of B-tree stats in consecutively VACUUM. +-- +CREATE TABLE btree_stats_tbl(col_int int, col_text text, col_numeric numeric, col_unq int) DISTRIBUTED BY (col_int); +CREATE INDEX btree_stats_idx ON btree_stats_tbl(col_int); +INSERT INTO btree_stats_tbl VALUES (1, 'aa', 1001, 101), (2, 'bb', 1002, 102); +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; + reltuples +----------- + -1 +(1 row) + +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | btree_stats_idx | 0 + 1 | btree_stats_idx | 0 + 2 | btree_stats_idx | 0 +(3 rows) + +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; + reltuples +----------- + 0 +(1 row) + +-- 1st VACUUM, expect reltuples = 2 +vacuum btree_stats_tbl; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | btree_stats_idx | 1 + 1 | btree_stats_idx | 1 + 2 | btree_stats_idx | 0 +(3 rows) + +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; + reltuples +----------- + 2 +(1 row) + +-- 2nd VACUUM, expect reltuples = 2 +vacuum btree_stats_tbl; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | btree_stats_idx | 1 + 1 | btree_stats_idx | 1 + 2 | btree_stats_idx | 0 +(3 rows) + +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; + reltuples +----------- + 2 +(1 row) + +-- Prior to this fix, the case would be failed here. Given the +-- scenario of updating stats during VACUUM: +-- 1) coordinator vacuums and updates stats of its own; +-- 2) then coordinator dispatches vacuum to segments; +-- 3) coordinator combines stats received from segments to overwrite the stats of its own. +-- Because upstream introduced a feature which could skip full index scan uring cleanup +-- of B-tree indexes when possible (refer to: +-- https://github.com/postgres/postgres/commit/857f9c36cda520030381bd8c2af20adf0ce0e1d4), +-- there was a case in QD-QEs distributed deployment that some QEs could skip full index scan and +-- stop updating statistics, result in QD being unable to collect all QEs' stats thus overwrote +-- a paritial accumulated value to index->reltuples. More interesting, it usually happened starting +-- from the 3rd time of consecutively VACUUM after fresh inserts due to above skipping index scan +-- criteria. +-- 3rd VACUUM, expect reltuples = 2 +vacuum btree_stats_tbl; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | btree_stats_idx | 1 + 1 | btree_stats_idx | 1 + 2 | btree_stats_idx | 0 +(3 rows) + +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; + reltuples +----------- + 2 +(1 row) + diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index d0f1c2c4ac7..0f1e6ae13f9 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2469,9 +2469,7 @@ select count(*) from -> Gather Motion 3:1 (slice2; segments: 3) Merge Key: y.unique2 -> Subquery Scan on y - -> Sort - Sort Key: y_1.unique2 - -> Seq Scan on tenk1 y_1 + -> Index Scan using tenk1_unique2 on tenk1 y_1 Optimizer: Postgres query optimizer (18 rows) diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out index 7453cc83507..58c2e436571 100644 --- a/src/test/regress/expected/memoize.out +++ b/src/test/regress/expected/memoize.out @@ -39,23 +39,23 @@ SELECT explain_memoize(' SELECT COUNT(*),AVG(t1.unique1) FROM tenk1 t1 INNER JOIN tenk1 t2 ON t1.unique1 = t2.twenty WHERE t2.unique1 < 1000;', false); - explain_memoize --------------------------------------------------------------------------------------------------------- + explain_memoize +------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=1 loops=N) -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=N) -> Partial Aggregate (actual rows=1 loops=N) - -> Merge Join (actual rows=400 loops=N) - Merge Cond: (t1.unique1 = t2.twenty) - -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=9 loops=N) - Heap Fetches: N - -> Sort (actual rows=400 loops=N) - Sort Key: t2.twenty - Sort Method: quicksort Memory: NkB - -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) - Hash Key: t2.twenty - -> Seq Scan on tenk1 t2 (actual rows=340 loops=N) - Filter: (unique1 < 1000) - Rows Removed by Filter: 2906 + -> Nested Loop (actual rows=400 loops=N) + -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) + Hash Key: t2.twenty + -> Seq Scan on tenk1 t2 (actual rows=340 loops=N) + Filter: (unique1 < 1000) + Rows Removed by Filter: 2906 + -> Memoize (actual rows=1 loops=N) + Cache Key: t2.twenty + Cache Mode: logical + -> Index Only Scan using tenk1_unique1 on tenk1 t1 (actual rows=1 loops=N) + Index Cond: (unique1 = t2.twenty) + Heap Fetches: N Optimizer: Postgres query optimizer (16 rows) @@ -73,23 +73,23 @@ SELECT explain_memoize(' SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1) t2 WHERE t1.unique1 < 1000;', false); - explain_memoize --------------------------------------------------------------------------------------------------------- + explain_memoize +------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=1 loops=N) -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=N) -> Partial Aggregate (actual rows=1 loops=N) - -> Merge Join (actual rows=400 loops=N) - Merge Cond: (t2.unique1 = t1.twenty) - -> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=9 loops=N) - Heap Fetches: N - -> Sort (actual rows=400 loops=N) - Sort Key: t1.twenty - Sort Method: quicksort Memory: NkB - -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) - Hash Key: t1.twenty - -> Seq Scan on tenk1 t1 (actual rows=340 loops=N) - Filter: (unique1 < 1000) - Rows Removed by Filter: 2906 + -> Nested Loop (actual rows=400 loops=N) + -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) + Hash Key: t1.twenty + -> Seq Scan on tenk1 t1 (actual rows=340 loops=N) + Filter: (unique1 < 1000) + Rows Removed by Filter: 2906 + -> Memoize (actual rows=1 loops=N) + Cache Key: t1.twenty + Cache Mode: logical + -> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1 loops=N) + Index Cond: (unique1 = t1.twenty) + Heap Fetches: N Optimizer: Postgres query optimizer (16 rows) @@ -284,10 +284,12 @@ WHERE t1.unique1 < 1000; -> Hash -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: t1.twenty - -> Seq Scan on tenk1 t1 - Filter: (unique1 < 1000) + -> Bitmap Heap Scan on tenk1 t1 + Recheck Cond: (unique1 < 1000) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 < 1000) Optimizer: Postgres query optimizer -(12 rows) +(14 rows) -- And ensure the parallel plan gives us the correct results. SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, diff --git a/src/test/regress/expected/memoize_optimizer.out b/src/test/regress/expected/memoize_optimizer.out index e65e98d36ca..f34752dab7e 100644 --- a/src/test/regress/expected/memoize_optimizer.out +++ b/src/test/regress/expected/memoize_optimizer.out @@ -69,23 +69,23 @@ SELECT explain_memoize(' SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, LATERAL (SELECT t2.unique1 FROM tenk1 t2 WHERE t1.twenty = t2.unique1) t2 WHERE t1.unique1 < 1000;', false); - explain_memoize --------------------------------------------------------------------------------------------------------- + explain_memoize +------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=1 loops=N) -> Gather Motion 3:1 (slice1; segments: 3) (actual rows=3 loops=N) -> Partial Aggregate (actual rows=1 loops=N) - -> Merge Join (actual rows=400 loops=N) - Merge Cond: (t2.unique1 = t1.twenty) - -> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=9 loops=N) - Heap Fetches: N - -> Sort (actual rows=400 loops=N) - Sort Key: t1.twenty - Sort Method: quicksort Memory: NkB - -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) - Hash Key: t1.twenty - -> Seq Scan on tenk1 t1 (actual rows=340 loops=N) - Filter: (unique1 < 1000) - Rows Removed by Filter: 2906 + -> Nested Loop (actual rows=400 loops=N) + -> Redistribute Motion 3:3 (slice2; segments: 3) (actual rows=400 loops=N) + Hash Key: t1.twenty + -> Seq Scan on tenk1 t1 (actual rows=340 loops=N) + Filter: (unique1 < 1000) + Rows Removed by Filter: 2906 + -> Memoize (actual rows=1 loops=N) + Cache Key: t1.twenty + Cache Mode: logical + -> Index Only Scan using tenk1_unique1 on tenk1 t2 (actual rows=1 loops=N) + Index Cond: (unique1 = t1.twenty) + Heap Fetches: N Optimizer: Postgres query optimizer (16 rows) @@ -276,10 +276,12 @@ WHERE t1.unique1 < 1000; -> Hash -> Redistribute Motion 3:3 (slice2; segments: 3) Hash Key: t1.twenty - -> Seq Scan on tenk1 t1 - Filter: (unique1 < 1000) + -> Bitmap Heap Scan on tenk1 t1 + Recheck Cond: (unique1 < 1000) + -> Bitmap Index Scan on tenk1_unique1 + Index Cond: (unique1 < 1000) Optimizer: Postgres query optimizer -(12 rows) +(14 rows) -- And ensure the parallel plan gives us the correct results. SELECT COUNT(*),AVG(t2.unique1) FROM tenk1 t1, diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index 08e4ccff440..73696351a42 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -274,14 +274,13 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g; EXPLAIN (COSTS OFF) SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; - QUERY PLAN ----------------------------------------------------- + QUERY PLAN +------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Hash Join - Hash Cond: (a.unique1 = g.g) - -> Seq Scan on tenk1 a - -> Hash - -> Function Scan on my_gen_series g + -> Nested Loop + -> Function Scan on my_gen_series g + -> Index Scan using tenk1_unique1 on tenk1 a + Index Cond: (unique1 = g.g) Optimizer: Postgres query optimizer -(7 rows) +(6 rows) diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index 951c6a5438f..180498dae04 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -651,11 +651,9 @@ explain (costs off) -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate -> Merge Join - Merge Cond: (tenk2.unique1 = tenk1.unique1) + Merge Cond: (tenk1.unique1 = tenk2.unique1) + -> Index Only Scan using tenk1_unique1 on tenk1 -> Index Only Scan using tenk2_unique1 on tenk2 - -> Sort - Sort Key: tenk1.unique1 - -> Seq Scan on tenk1 Optimizer: Postgres query optimizer (10 rows) diff --git a/src/test/regress/expected/uao_compaction/index_stats.out b/src/test/regress/expected/uao_compaction/index_stats.out index 1c32cfd8caa..afdad152b1d 100644 --- a/src/test/regress/expected/uao_compaction/index_stats.out +++ b/src/test/regress/expected/uao_compaction/index_stats.out @@ -40,4 +40,164 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab_int_idx1'; mytab_int_idx1 | 2 (1 row) --- end_ignore \ No newline at end of file +-- Test to ensure that reltuples is updated for an index after lazy vacuum. +-- This is vital as most index AMs that depend on this tuple count (eg btree, bitmap etc) +-- which is passed up from the table AM during lazy vacuum. +-- create a fresh table for the test +CREATE TABLE mytab2( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true) DISTRIBUTED BY (col_int); +create index mytab2_int_idx1 on mytab2 using bitmap(col_int); +insert into mytab2 values(1,'aa',1001,101),(2,'bb',1002,102); +SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab2_int_idx1'; + relname | reltuples +-----------------+----------- + mytab2_int_idx1 | 0 +(1 row) + +-- first vacuum collect table stat on segments +vacuum mytab2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'mytab2_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | mytab2_int_idx1 | 0 + 1 | mytab2_int_idx1 | 0 + 2 | mytab2_int_idx1 | 0 +(3 rows) + +-- second vacuum update index stat with table stat +vacuum mytab2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'mytab2_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | mytab2_int_idx1 | 1 + 1 | mytab2_int_idx1 | 1 + 2 | mytab2_int_idx1 | 0 +(3 rows) + +SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab2_int_idx1'; + relname | reltuples +-----------------+----------- + mytab2_int_idx1 | 2 +(1 row) + +-- Test correctness of index->reltuples in consecutively VACUUM. +CREATE TABLE mytab3( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true) DISTRIBUTED BY (col_int); +create index mytab3_int_idx1 on mytab3(col_int); +insert into mytab3 values(1,'aa',1001,101),(2,'bb',1002,102); +select reltuples from pg_class where relname='mytab3'; + reltuples +----------- + 0 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | mytab3_int_idx1 | 0 + 1 | mytab3_int_idx1 | 0 + 2 | mytab3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='mytab3_int_idx1'; + reltuples +----------- + 0 +(1 row) + +-- 1st VACUUM, expect reltuples = 2 +vacuum mytab3; +select reltuples from pg_class where relname='mytab3'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | mytab3_int_idx1 | 1 + 1 | mytab3_int_idx1 | 1 + 2 | mytab3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='mytab3_int_idx1'; + reltuples +----------- + 2 +(1 row) + +-- 2nd VACUUM, expect reltuples = 2 +vacuum mytab3; +select reltuples from pg_class where relname='mytab3'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | mytab3_int_idx1 | 1 + 1 | mytab3_int_idx1 | 1 + 2 | mytab3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='mytab3_int_idx1'; + reltuples +----------- + 2 +(1 row) + +-- Prior to this fix, the case would be failed here. Given the +-- scenario of updating stats during VACUUM: +-- 1) coordinator vacuums and updates stats of its own; +-- 2) then coordinator dispatches vacuum to segments; +-- 3) coordinator combines stats received from segments to overwrite the stats of its own. +-- Because upstream introduced a feature which could skip full index scan uring cleanup +-- of B-tree indexes when possible (refer to: +-- https://github.com/postgres/postgres/commit/857f9c36cda520030381bd8c2af20adf0ce0e1d4), +-- there was a case in QD-QEs distributed deployment that some QEs could skip full index scan and +-- stop updating statistics, result in QD being unable to collect all QEs' stats thus overwrote +-- a paritial accumulated value to index->reltuples. More interesting, it usually happened starting +-- from the 3rd time of consecutively VACUUM after fresh inserts due to above skipping index scan +-- criteria. +-- 3rd VACUUM, expect reltuples = 2 +vacuum mytab3; +select reltuples from pg_class where relname='mytab3'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------+----------- + 0 | mytab3_int_idx1 | 1 + 1 | mytab3_int_idx1 | 1 + 2 | mytab3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='mytab3_int_idx1'; + reltuples +----------- + 2 +(1 row) + +drop table mytab; +drop table mytab2; +drop table mytab3; diff --git a/src/test/regress/expected/uaocs_compaction/index_stats.out b/src/test/regress/expected/uaocs_compaction/index_stats.out index adc286c0811..7ada461449a 100644 --- a/src/test/regress/expected/uaocs_compaction/index_stats.out +++ b/src/test/regress/expected/uaocs_compaction/index_stats.out @@ -45,4 +45,164 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats_int_i uaocs_index_stats_int_idx1 | 2 (1 row) --- end_ignore \ No newline at end of file +-- Test to ensure that reltuples is updated for an index after lazy vacuum. +-- This is vital as most index AMs that depend on this tuple count (eg btree, bitmap etc) +-- which is passed up from the table AM during lazy vacuum. +-- create a fresh table for the test +CREATE TABLE uaocs_index_stats2( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true, orientation=column) DISTRIBUTED BY (col_int); +create index uaocs_index_stats2_int_idx1 on uaocs_index_stats2 using bitmap(col_int); +insert into uaocs_index_stats2 values(1,'aa',1001,101),(2,'bb',1002,102); +SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats2_int_idx1'; + relname | reltuples +-----------------------------+----------- + uaocs_index_stats2_int_idx1 | 0 +(1 row) + +-- first vacuum collect table stat on segments +vacuum uaocs_index_stats2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'uaocs_index_stats2_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------------------+----------- + 0 | uaocs_index_stats2_int_idx1 | 0 + 1 | uaocs_index_stats2_int_idx1 | 0 + 2 | uaocs_index_stats2_int_idx1 | 0 +(3 rows) + +-- second vacuum update index stat with table stat +vacuum uaocs_index_stats2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'uaocs_index_stats2_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------------------+----------- + 0 | uaocs_index_stats2_int_idx1 | 1 + 1 | uaocs_index_stats2_int_idx1 | 1 + 2 | uaocs_index_stats2_int_idx1 | 0 +(3 rows) + +SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats2_int_idx1'; + relname | reltuples +-----------------------------+----------- + uaocs_index_stats2_int_idx1 | 2 +(1 row) + +-- Test correctness of index->reltuples in consecutively VACUUM. +CREATE TABLE uaocs_index_stats3( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true, orientation=column) DISTRIBUTED BY (col_int); +create index uaocs_index_stats3_int_idx1 on uaocs_index_stats3(col_int); +insert into uaocs_index_stats3 values(1,'aa',1001,101),(2,'bb',1002,102); +select reltuples from pg_class where relname='uaocs_index_stats3'; + reltuples +----------- + 0 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------------------+----------- + 0 | uaocs_index_stats3_int_idx1 | 0 + 1 | uaocs_index_stats3_int_idx1 | 0 + 2 | uaocs_index_stats3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; + reltuples +----------- + 0 +(1 row) + +-- 1st VACUUM, expect reltuples = 2 +vacuum uaocs_index_stats3; +select reltuples from pg_class where relname='uaocs_index_stats'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------------------+----------- + 0 | uaocs_index_stats3_int_idx1 | 1 + 1 | uaocs_index_stats3_int_idx1 | 1 + 2 | uaocs_index_stats3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; + reltuples +----------- + 2 +(1 row) + +-- 2nd VACUUM, expect reltuples = 2 +vacuum uaocs_index_stats3; +select reltuples from pg_class where relname='uaocs_index_stats3'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------------------+----------- + 0 | uaocs_index_stats3_int_idx1 | 1 + 1 | uaocs_index_stats3_int_idx1 | 1 + 2 | uaocs_index_stats3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; + reltuples +----------- + 2 +(1 row) + +-- Prior to this fix, the case would be failed here. Given the +-- scenario of updating stats during VACUUM: +-- 1) coordinator vacuums and updates stats of its own; +-- 2) then coordinator dispatches vacuum to segments; +-- 3) coordinator combines stats received from segments to overwrite the stats of its own. +-- Because upstream introduced a feature which could skip full index scan uring cleanup +-- of B-tree indexes when possible (refer to: +-- https://github.com/postgres/postgres/commit/857f9c36cda520030381bd8c2af20adf0ce0e1d4), +-- there was a case in QD-QEs distributed deployment that some QEs could skip full index scan and +-- stop updating statistics, result in QD being unable to collect all QEs' stats thus overwrote +-- a paritial accumulated value to index->reltuples. More interesting, it usually happened starting +-- from the 3rd time of consecutively VACUUM after fresh inserts due to above skipping index scan +-- criteria. +-- 3rd VACUUM, expect reltuples = 2 +vacuum uaocs_index_stats3; +select reltuples from pg_class where relname='uaocs_index_stats3'; + reltuples +----------- + 2 +(1 row) + +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; + gp_segment_id | relname | reltuples +---------------+-----------------------------+----------- + 0 | uaocs_index_stats3_int_idx1 | 1 + 1 | uaocs_index_stats3_int_idx1 | 1 + 2 | uaocs_index_stats3_int_idx1 | 0 +(3 rows) + +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; + reltuples +----------- + 2 +(1 row) + +drop table uaocs_index_stats; +drop table uaocs_index_stats2; +drop table uaocs_index_stats3; diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 64e158fafc5..a771eb62b8b 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -175,16 +175,45 @@ VACUUM delete_test_table; -- need to insert some rows to cause the fast root page to split. INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i; --- Test unsupported btree opclass parameters -create index on btree_tall_tbl (id int4_ops(foo=1)); - --- Test case of ALTER INDEX with abuse of column names for indexes. --- This grammar is not officially supported, but the parser allows it. -CREATE INDEX btree_tall_idx2 ON btree_tall_tbl (id); -ALTER INDEX btree_tall_idx2 ALTER COLUMN id SET (n_distinct=100); -DROP INDEX btree_tall_idx2; --- Partitioned index -CREATE TABLE btree_part (id int4) PARTITION BY RANGE (id); -CREATE INDEX btree_part_idx ON btree_part(id); -ALTER INDEX btree_part_idx ALTER COLUMN id SET (n_distinct=100); -DROP TABLE btree_part; +-- +-- GPDB: Test correctness of B-tree stats in consecutively VACUUM. +-- +CREATE TABLE btree_stats_tbl(col_int int, col_text text, col_numeric numeric, col_unq int) DISTRIBUTED BY (col_int); +CREATE INDEX btree_stats_idx ON btree_stats_tbl(col_int); +INSERT INTO btree_stats_tbl VALUES (1, 'aa', 1001, 101), (2, 'bb', 1002, 102); +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; +-- 1st VACUUM, expect reltuples = 2 +vacuum btree_stats_tbl; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; +-- 2nd VACUUM, expect reltuples = 2 +vacuum btree_stats_tbl; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; + +-- Prior to this fix, the case would be failed here. Given the +-- scenario of updating stats during VACUUM: +-- 1) coordinator vacuums and updates stats of its own; +-- 2) then coordinator dispatches vacuum to segments; +-- 3) coordinator combines stats received from segments to overwrite the stats of its own. +-- Because upstream introduced a feature which could skip full index scan uring cleanup +-- of B-tree indexes when possible (refer to: +-- https://github.com/postgres/postgres/commit/857f9c36cda520030381bd8c2af20adf0ce0e1d4), +-- there was a case in QD-QEs distributed deployment that some QEs could skip full index scan and +-- stop updating statistics, result in QD being unable to collect all QEs' stats thus overwrote +-- a paritial accumulated value to index->reltuples. More interesting, it usually happened starting +-- from the 3rd time of consecutively VACUUM after fresh inserts due to above skipping index scan +-- criteria. +-- 3rd VACUUM, expect reltuples = 2 +vacuum btree_stats_tbl; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_tbl'; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'btree_stats_idx'; +SELECT reltuples FROM pg_class WHERE relname='btree_stats_idx'; diff --git a/src/test/regress/sql/uao_compaction/index_stats.sql b/src/test/regress/sql/uao_compaction/index_stats.sql index 05ad79953ef..f88e000e707 100644 --- a/src/test/regress/sql/uao_compaction/index_stats.sql +++ b/src/test/regress/sql/uao_compaction/index_stats.sql @@ -23,4 +23,83 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab'; -- for reltuples to coordinate with the new behavior. -- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab_int_idx1'; --- end_ignore + +-- Test to ensure that reltuples is updated for an index after lazy vacuum. +-- This is vital as most index AMs that depend on this tuple count (eg btree, bitmap etc) +-- which is passed up from the table AM during lazy vacuum. +-- create a fresh table for the test +CREATE TABLE mytab2( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true) DISTRIBUTED BY (col_int); + +create index mytab2_int_idx1 on mytab2 using bitmap(col_int); + +insert into mytab2 values(1,'aa',1001,101),(2,'bb',1002,102); + +SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab2_int_idx1'; + +-- first vacuum collect table stat on segments +vacuum mytab2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'mytab2_int_idx1'; +-- second vacuum update index stat with table stat +vacuum mytab2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'mytab2_int_idx1'; +SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab2_int_idx1'; + +-- Test correctness of index->reltuples in consecutively VACUUM. +CREATE TABLE mytab3( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true) DISTRIBUTED BY (col_int); + +create index mytab3_int_idx1 on mytab3(col_int); + +insert into mytab3 values(1,'aa',1001,101),(2,'bb',1002,102); + +select reltuples from pg_class where relname='mytab3'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; +select reltuples from pg_class where relname='mytab3_int_idx1'; +-- 1st VACUUM, expect reltuples = 2 +vacuum mytab3; +select reltuples from pg_class where relname='mytab3'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; +select reltuples from pg_class where relname='mytab3_int_idx1'; +-- 2nd VACUUM, expect reltuples = 2 +vacuum mytab3; +select reltuples from pg_class where relname='mytab3'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; +select reltuples from pg_class where relname='mytab3_int_idx1'; + +-- Prior to this fix, the case would be failed here. Given the +-- scenario of updating stats during VACUUM: +-- 1) coordinator vacuums and updates stats of its own; +-- 2) then coordinator dispatches vacuum to segments; +-- 3) coordinator combines stats received from segments to overwrite the stats of its own. +-- Because upstream introduced a feature which could skip full index scan uring cleanup +-- of B-tree indexes when possible (refer to: +-- https://github.com/postgres/postgres/commit/857f9c36cda520030381bd8c2af20adf0ce0e1d4), +-- there was a case in QD-QEs distributed deployment that some QEs could skip full index scan and +-- stop updating statistics, result in QD being unable to collect all QEs' stats thus overwrote +-- a paritial accumulated value to index->reltuples. More interesting, it usually happened starting +-- from the 3rd time of consecutively VACUUM after fresh inserts due to above skipping index scan +-- criteria. +-- 3rd VACUUM, expect reltuples = 2 +vacuum mytab3; +select reltuples from pg_class where relname='mytab3'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'mytab3_int_idx1'; +select reltuples from pg_class where relname='mytab3_int_idx1'; + +drop table mytab; +drop table mytab2; +drop table mytab3; diff --git a/src/test/regress/sql/uaocs_compaction/index_stats.sql b/src/test/regress/sql/uaocs_compaction/index_stats.sql index c93ee37f686..d87b41a8b45 100644 --- a/src/test/regress/sql/uaocs_compaction/index_stats.sql +++ b/src/test/regress/sql/uaocs_compaction/index_stats.sql @@ -23,4 +23,83 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats'; -- for reltuples to coordinate with the new behavior. -- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats_int_idx1'; --- end_ignore + +-- Test to ensure that reltuples is updated for an index after lazy vacuum. +-- This is vital as most index AMs that depend on this tuple count (eg btree, bitmap etc) +-- which is passed up from the table AM during lazy vacuum. +-- create a fresh table for the test +CREATE TABLE uaocs_index_stats2( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true, orientation=column) DISTRIBUTED BY (col_int); + +create index uaocs_index_stats2_int_idx1 on uaocs_index_stats2 using bitmap(col_int); + +insert into uaocs_index_stats2 values(1,'aa',1001,101),(2,'bb',1002,102); + +SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats2_int_idx1'; + +-- first vacuum collect table stat on segments +vacuum uaocs_index_stats2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'uaocs_index_stats2_int_idx1'; +-- second vacuum update index stat with table stat +vacuum uaocs_index_stats2; +-- inspect the state of the stats on segments +SELECT gp_segment_id, relname, reltuples FROM gp_dist_random('pg_class') WHERE relname = 'uaocs_index_stats2_int_idx1'; +SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats2_int_idx1'; + +-- Test correctness of index->reltuples in consecutively VACUUM. +CREATE TABLE uaocs_index_stats3( + col_int int, + col_text text, + col_numeric numeric, + col_unq int + ) with(appendonly=true, orientation=column) DISTRIBUTED BY (col_int); + +create index uaocs_index_stats3_int_idx1 on uaocs_index_stats3(col_int); + +insert into uaocs_index_stats3 values(1,'aa',1001,101),(2,'bb',1002,102); + +select reltuples from pg_class where relname='uaocs_index_stats3'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; +-- 1st VACUUM, expect reltuples = 2 +vacuum uaocs_index_stats3; +select reltuples from pg_class where relname='uaocs_index_stats'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; +-- 2nd VACUUM, expect reltuples = 2 +vacuum uaocs_index_stats3; +select reltuples from pg_class where relname='uaocs_index_stats3'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; + +-- Prior to this fix, the case would be failed here. Given the +-- scenario of updating stats during VACUUM: +-- 1) coordinator vacuums and updates stats of its own; +-- 2) then coordinator dispatches vacuum to segments; +-- 3) coordinator combines stats received from segments to overwrite the stats of its own. +-- Because upstream introduced a feature which could skip full index scan uring cleanup +-- of B-tree indexes when possible (refer to: +-- https://github.com/postgres/postgres/commit/857f9c36cda520030381bd8c2af20adf0ce0e1d4), +-- there was a case in QD-QEs distributed deployment that some QEs could skip full index scan and +-- stop updating statistics, result in QD being unable to collect all QEs' stats thus overwrote +-- a paritial accumulated value to index->reltuples. More interesting, it usually happened starting +-- from the 3rd time of consecutively VACUUM after fresh inserts due to above skipping index scan +-- criteria. +-- 3rd VACUUM, expect reltuples = 2 +vacuum uaocs_index_stats3; +select reltuples from pg_class where relname='uaocs_index_stats3'; +-- inspect the state of the stats on segments +select gp_segment_id, relname, reltuples from gp_dist_random('pg_class') where relname = 'uaocs_index_stats3_int_idx1'; +select reltuples from pg_class where relname='uaocs_index_stats3_int_idx1'; + +drop table uaocs_index_stats; +drop table uaocs_index_stats2; +drop table uaocs_index_stats3;