From 760e835751cdaa5ae0610557a7ad2b4662dc8cc9 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 8 Aug 2022 12:13:14 -0700 Subject: [PATCH 01/19] gp_aoblkdir: Block directory inspection function This commit introduces the gp_toolkit.__gp_aoblkdir UDF to print all of the block directory entries for an AO/AOCO relation. It essentially flattens the minipage binary column in pg_aoblkdir_* relations, printing a minipage entry in every output row. This function is in the same spirit as __gp_aovisimap() and can be run in utility mode or with gp_dist_random() to obtain meaningful results. Example: postgres=# create table baz(i int, j int) with (appendonly=true); postgres=# create index on baz(i); postgres=# insert into baz select i, i FROM generate_series(1, 10) i; postgres=# insert into baz select i, i FROM generate_series(1, 10) i; postgres=# SELECT gp_segment_id, (gp_toolkit.__gp_aoblkdir('baz')).* FROM gp_dist_random('gp_id') order by 1, 2, 3, 4, 5, 6, 7, 8; gp_segment_id | tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count ---------------+---------+-------+----------------+----------+--------------+-------------+----------- 0 | (0,2) | 1 | 0 | 0 | 1 | 0 | 100 0 | (0,2) | 1 | 0 | 1 | 101 | 128 | 5 1 | (0,2) | 1 | 0 | 0 | 1 | 0 | 100 1 | (0,2) | 1 | 0 | 1 | 101 | 40 | 1 2 | (0,2) | 1 | 0 | 0 | 1 | 0 | 100 2 | (0,2) | 1 | 0 | 1 | 101 | 104 | 4 (6 rows) PGOPTIONS='-c gp_role=utility' psql postgres -p 7002 psql (12beta2) Type "help" for help. postgres=# select * from gp_toolkit.__gp_aoblkdir('baz'); tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count ---------+-------+----------------+----------+--------------+-------------+----------- (0,2) | 1 | 0 | 0 | 1 | 0 | 100 (0,2) | 1 | 0 | 1 | 101 | 128 | 5 (2 rows) This UDF also respects gp_select_invisible to report block directory entries that are invisible. To determine invisible entries we can use the tupleid projected here and tie it to the corresponding pg_aoblkdir tuple's xmax. --- .../gp_internal_tools/gp_ao_co_diagnostics.c | 21 ++ src/backend/access/appendonly/Makefile | 2 +- .../access/appendonly/appendonly_blkdir_udf.c | 210 ++++++++++++++++++ .../appendonly/appendonlyblockdirectory.c | 44 +--- src/backend/catalog/gp_toolkit.sql | 10 + src/include/cdb/cdbappendonlyblockdirectory.h | 41 ++++ .../regress/expected/gp_toolkit_ao_funcs.out | 34 +++ src/test/regress/sql/gp_toolkit_ao_funcs.sql | 10 + 8 files changed, 330 insertions(+), 42 deletions(-) create mode 100644 src/backend/access/appendonly/appendonly_blkdir_udf.c diff --git a/gpcontrib/gp_internal_tools/gp_ao_co_diagnostics.c b/gpcontrib/gp_internal_tools/gp_ao_co_diagnostics.c index 2579075ef96..807e52ceaa2 100644 --- a/gpcontrib/gp_internal_tools/gp_ao_co_diagnostics.c +++ b/gpcontrib/gp_internal_tools/gp_ao_co_diagnostics.c @@ -48,6 +48,9 @@ gp_aoseg(PG_FUNCTION_ARGS); extern Datum gp_aocsseg_history(PG_FUNCTION_ARGS); +extern Datum +gp_aoblkdir(PG_FUNCTION_ARGS); + extern Datum gp_aovisimap(PG_FUNCTION_ARGS); @@ -67,6 +70,7 @@ PG_FUNCTION_INFO_V1(gp_aoseg_history_wrapper); PG_FUNCTION_INFO_V1(gp_aoseg_wrapper); PG_FUNCTION_INFO_V1(gp_aocsseg_wrapper); PG_FUNCTION_INFO_V1(gp_aocsseg_history_wrapper); +PG_FUNCTION_INFO_V1(gp_aoblkdir_wrapper); PG_FUNCTION_INFO_V1(gp_aovisimap_wrapper); PG_FUNCTION_INFO_V1(gp_aovisimap_entry_wrapper); PG_FUNCTION_INFO_V1(gp_aovisimap_hidden_info_wrapper); @@ -84,6 +88,8 @@ gp_aocsseg_wrapper(PG_FUNCTION_ARGS); extern Datum gp_aocsseg_history_wrapper(PG_FUNCTION_ARGS); extern Datum +gp_aoblkdir_wrapper(PG_FUNCTION_ARGS); +extern Datum gp_aovisimap_wrapper(PG_FUNCTION_ARGS); extern Datum gp_aovisimap_entry_wrapper(PG_FUNCTION_ARGS); @@ -228,6 +234,21 @@ gp_aocsseg_history_wrapper(PG_FUNCTION_ARGS) PG_RETURN_DATUM(returnValue); } +/* + * Interface to gp_aoblkdir_wrapper function. + * + * CREATE FUNCTION gp_aoblkdir_wrapper(regclass) RETURNS TABLE + * (segno integer, columngroup_no integer, first_row_no bigint, file_offset bigint, row_count bigint) + * AS '$libdir/gp_ao_co_diagnostics.so', 'gp_aoblkdir_wrapper' LANGUAGE C STRICT; + */ +Datum +gp_aoblkdir_wrapper(PG_FUNCTION_ARGS) +{ + Datum returnValue = gp_aoblkdir(fcinfo); + + PG_RETURN_DATUM(returnValue); +} + /* * Interface to gp_aovisimap_wrapper function. * diff --git a/src/backend/access/appendonly/Makefile b/src/backend/access/appendonly/Makefile index 7b61f42a707..430e397f2fa 100755 --- a/src/backend/access/appendonly/Makefile +++ b/src/backend/access/appendonly/Makefile @@ -15,7 +15,7 @@ OBJS = appendonlyam_handler.o appendonlyam.o aosegfiles.o aomd.o \ appendonlyblockdirectory.o appendonly_visimap.o \ appendonly_visimap_entry.o appendonly_visimap_store.o \ appendonly_compaction.o appendonly_visimap_udf.o \ - aomd_filehandler.o + appendonly_blkdir_udf.o aomd_filehandler.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/appendonly/appendonly_blkdir_udf.c b/src/backend/access/appendonly/appendonly_blkdir_udf.c new file mode 100644 index 00000000000..2c807b58fb0 --- /dev/null +++ b/src/backend/access/appendonly/appendonly_blkdir_udf.c @@ -0,0 +1,210 @@ +/*------------------------------------------------------------------------------ + * + * AppendOnly_Blkdir UDFs + * User-defined functions (UDF) for support of append-only block directory + * + * Copyright (c) 2013-Present VMware, Inc. or its affiliates. + * + * + * IDENTIFICATION + * src/backend/access/appendonly/appendonly_blkdir_udf.c + * + *------------------------------------------------------------------------------ + */ + +#include "postgres.h" + +#include "access/appendonly_visimap.h" +#include "access/table.h" +#include "catalog/aoblkdir.h" +#include "cdb/cdbappendonlyblockdirectory.h" +#include "cdb/cdbvars.h" +#include "funcapi.h" +#include "utils/snapmgr.h" + +Datum gp_aoblkdir(PG_FUNCTION_ARGS); + +/* + * This UDF emits block directory entries for an AO/AOCO relation. It does so + * by flattening the minipage column of ao_blkdir relations, yielding 1 minipage + * entry / output row. + * + * Format: + * tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count + * + * This UDF also respects gp_select_invisible to report block directory entries + * that are invisible. To determine invisible entries we can use the tupleid + * projected here and tie it to the corresponding pg_aoblkdir tuple's xmax. + */ + +Datum +gp_aoblkdir(PG_FUNCTION_ARGS) +{ + Oid aoRelOid = PG_GETARG_OID(0); + HeapTuple tuple; + + typedef struct Context + { + Relation aorel; + SysScanDesc scan; + MinipagePerColumnGroup currMinipage; + bool currMinipageValid; + int currMinipageEntryIdx; + Relation blkdirrel; + } Context; + + FuncCallContext *funcctx; + Context *context; + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext oldcontext; + Snapshot sst; + Oid blkdirrelid; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* build tupdesc for result tuples */ + tupdesc = CreateTemplateTupleDesc(7); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tupleid", + TIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "segno", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "columngroup_no", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "entry_no", + INT4OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "first_row_no", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "file_offset", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "row_count", + INT8OID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* initialize Context for SRF */ + context = (Context *) palloc0(sizeof(Context)); + context->aorel = table_open(aoRelOid, AccessShareLock); + if (!RelationIsAppendOptimized(context->aorel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function not supported on non append-optimized relation"))); + sst = GetLatestSnapshot(); + GetAppendOnlyEntryAuxOids(aoRelOid, sst, + NULL, &blkdirrelid, NULL, + NULL, NULL); + sst = gp_select_invisible ? SnapshotAny : GetLatestSnapshot(); + if (blkdirrelid == InvalidOid) + ereport(ERROR, + (errmsg("appendoptimized relation doesn't have a block directory"), + errhint("relation must have or must have had an index"))); + context->blkdirrel = table_open(blkdirrelid, AccessShareLock); + context->scan = systable_beginscan(context->blkdirrel, + InvalidOid, + false, + sst, + 0, + NULL); + context->currMinipage.minipage = palloc0(minipage_size(NUM_MINIPAGE_ENTRIES)); + context->currMinipageValid = false; + context->currMinipageEntryIdx = -1; + funcctx->user_fctx = (void *) context; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + context = (Context *) funcctx->user_fctx; + + if (!context->currMinipageValid) + { + Datum minipage; + bool minipageNull; + + /* We need to fetch the next tuple from the blkdir relation */ + if (!systable_getnext(context->scan)) + goto srf_done; + + /* deform the tuple and populate slot->values/nulls */ + slot_getallattrs(context->scan->slot); + + minipage = slot_getattr(context->scan->slot, Anum_pg_aoblkdir_minipage, &minipageNull); + /* + * There should not really be any NULL values. We opt to report it + * instead of ERRORing out. + */ + context->currMinipageValid = !minipageNull; + if (context->currMinipageValid) + { + /* + * Cache the latest scanned minipage and use it to emit the next + * (context->currMinipage->numMinipageEntries) rows + */ + copy_out_minipage(&context->currMinipage, minipage, false); + context->currMinipageEntryIdx = 0; + } + } + + { + Datum values[7]; + bool nulls[7]; + TupleTableSlot *slot = context->scan->slot; + Datum result; + + values[0] = ItemPointerGetDatum(&slot->tts_tid); + nulls[0] = false; + + values[1] = slot_getattr(slot, Anum_pg_aoblkdir_segno, &nulls[1]); + values[2] = slot_getattr(slot, Anum_pg_aoblkdir_columngroupno, &nulls[2]); + + /* emit minipage entry */ + if (context->currMinipageValid) + { + MinipagePerColumnGroup *currMinipage = &context->currMinipage; + MinipageEntry *minipageEntry; + + Assert(context->currMinipageEntryIdx < currMinipage->numMinipageEntries); + + minipageEntry = &currMinipage->minipage->entry[context->currMinipageEntryIdx]; + + values[3] = context->currMinipageEntryIdx++; + values[4] = Int64GetDatum(minipageEntry->firstRowNum); + values[5] = Int64GetDatum(minipageEntry->fileOffset); + values[6] = Int64GetDatum(minipageEntry->rowCount); + + nulls[3] = false; + nulls[4] = false; + nulls[5] = false; + nulls[6] = false; + + context->currMinipageValid = + (context->currMinipageEntryIdx != currMinipage->numMinipageEntries); + } + else + { + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + } + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + SRF_RETURN_NEXT(funcctx, result); + } + +srf_done: + table_close(context->aorel, AccessShareLock); + systable_endscan(context->scan); + table_close(context->blkdirrel, AccessShareLock); + pfree(context); + funcctx->user_fctx = NULL; + SRF_RETURN_DONE(funcctx); +} diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index a97dbd71ef2..13fe635a72a 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -29,13 +29,6 @@ int gp_blockdirectory_entry_min_range = 0; int gp_blockdirectory_minipage_size = NUM_MINIPAGE_ENTRIES; -static inline uint32 -minipage_size(uint32 nEntry) -{ - return offsetof(Minipage, entry) + - sizeof(MinipageEntry) * nEntry; -} - static void load_last_minipage( AppendOnlyBlockDirectory *blockDirectory, int64 lastSequence, @@ -918,35 +911,6 @@ init_scankeys(TupleDesc tupleDesc, } } -/* - * copy_out_minipage - * - * Copy out the minipage content from a deformed tuple. - */ -static inline void -copy_out_minipage(MinipagePerColumnGroup *minipageInfo, - Datum minipage_value, - bool minipage_isnull) -{ - struct varlena *value; - struct varlena *detoast_value; - - Assert(!minipage_isnull); - - value = (struct varlena *) - DatumGetPointer(minipage_value); - detoast_value = pg_detoast_datum(value); - Assert(VARSIZE(detoast_value) <= minipage_size(NUM_MINIPAGE_ENTRIES)); - - memcpy(minipageInfo->minipage, detoast_value, VARSIZE(detoast_value)); - if (detoast_value != value) - pfree(detoast_value); - - Assert(minipageInfo->minipage->nEntry <= NUM_MINIPAGE_ENTRIES); - - minipageInfo->numMinipageEntries = minipageInfo->minipage->nEntry; -} - /* * extract_minipage @@ -1229,8 +1193,6 @@ write_minipage(AppendOnlyBlockDirectory *blockDirectory, MemoryContextSwitchTo(oldcxt); } - - void AppendOnlyBlockDirectory_End_forInsert( AppendOnlyBlockDirectory *blockDirectory) @@ -1284,8 +1246,7 @@ AppendOnlyBlockDirectory_End_forSearch( { int groupNo; - if (blockDirectory->blkdirRel == NULL || - blockDirectory->blkdirIdx == NULL) + if (blockDirectory->blkdirRel == NULL) return; for (groupNo = 0; groupNo < blockDirectory->numColumnGroups; groupNo++) @@ -1308,7 +1269,8 @@ AppendOnlyBlockDirectory_End_forSearch( pfree(blockDirectory->scanKeys); pfree(blockDirectory->strategyNumbers); - index_close(blockDirectory->blkdirIdx, AccessShareLock); + if (blockDirectory->blkdirIdx) + index_close(blockDirectory->blkdirIdx, AccessShareLock); heap_close(blockDirectory->blkdirRel, AccessShareLock); MemoryContextDelete(blockDirectory->memoryContext); diff --git a/src/backend/catalog/gp_toolkit.sql b/src/backend/catalog/gp_toolkit.sql index 3bb54540d63..915c69e55c6 100644 --- a/src/backend/catalog/gp_toolkit.sql +++ b/src/backend/catalog/gp_toolkit.sql @@ -1880,6 +1880,16 @@ AS '$libdir/gp_ao_co_diagnostics' , 'gp_aocsseg_history_wrapper' LANGUAGE C STRICT EXECUTE ON ALL SEGMENTS; GRANT EXECUTE ON FUNCTION gp_toolkit.__gp_aocsseg_history(regclass) TO public; +CREATE FUNCTION gp_toolkit.__gp_aoblkdir(regclass) +RETURNS TABLE (tupleid tid, + segno integer, + columngroup_no integer, + entry_no integer, + first_row_no bigint, + file_offset bigint, + row_count bigint) +AS '$libdir/gp_ao_co_diagnostics.so', 'gp_aoblkdir_wrapper' LANGUAGE C STRICT; + CREATE FUNCTION gp_toolkit.__gp_aovisimap(regclass) RETURNS TABLE (tid tid, segno int, diff --git a/src/include/cdb/cdbappendonlyblockdirectory.h b/src/include/cdb/cdbappendonlyblockdirectory.h index 7a314490105..998dea0ee84 100644 --- a/src/include/cdb/cdbappendonlyblockdirectory.h +++ b/src/include/cdb/cdbappendonlyblockdirectory.h @@ -152,6 +152,11 @@ typedef struct CurrentSegmentFile int64 logicalEof; } CurrentSegmentFile; +typedef struct AppendOnlyBlockDirectorySeqScan { + AppendOnlyBlockDirectory blkdir; + SysScanDesc sysScan; +} AppendOnlyBlockDirectorySeqScan; + extern void AppendOnlyBlockDirectoryEntry_GetBeginRange( AppendOnlyBlockDirectoryEntry *directoryEntry, int64 *fileOffset, @@ -225,4 +230,40 @@ extern void AppendOnlyBlockDirectory_DeleteSegmentFile( Snapshot snapshot, int segno, int columnGroupNo); + +static inline uint32 +minipage_size(uint32 nEntry) +{ + return offsetof(Minipage, entry) + sizeof(MinipageEntry) * nEntry; +} + +/* + * copy_out_minipage + * + * Copy out the minipage content from a deformed tuple. + */ +static inline void +copy_out_minipage(MinipagePerColumnGroup *minipageInfo, + Datum minipage_value, + bool minipage_isnull) +{ + struct varlena *value; + struct varlena *detoast_value; + + Assert(!minipage_isnull); + + value = (struct varlena *) + DatumGetPointer(minipage_value); + detoast_value = pg_detoast_datum(value); + Assert(VARSIZE(detoast_value) <= minipage_size(NUM_MINIPAGE_ENTRIES)); + + memcpy(minipageInfo->minipage, detoast_value, VARSIZE(detoast_value)); + if (detoast_value != value) + pfree(detoast_value); + + Assert(minipageInfo->minipage->nEntry <= NUM_MINIPAGE_ENTRIES); + + minipageInfo->numMinipageEntries = minipageInfo->minipage->nEntry; +} + #endif diff --git a/src/test/regress/expected/gp_toolkit_ao_funcs.out b/src/test/regress/expected/gp_toolkit_ao_funcs.out index 156a57f81ba..fe8f26d8218 100644 --- a/src/test/regress/expected/gp_toolkit_ao_funcs.out +++ b/src/test/regress/expected/gp_toolkit_ao_funcs.out @@ -10,12 +10,14 @@ DROP TABLE IF EXISTS toolkit_ao_test; CREATE TABLE toolkit_ao_test (a INT, b INT, c INT) WITH (appendonly=true) DISTRIBUTED BY (c); +CREATE INDEX ON toolkit_ao_test(a); INSERT INTO toolkit_ao_test SELECT i as a, i as b, 1 FROM generate_series(1,20) AS i; UPDATE toolkit_ao_test SET b = 0 WHERE a = 1; DELETE FROM toolkit_ao_test WHERE a = 2; DROP TABLE IF EXISTS toolkit_aocs_test; CREATE TABLE toolkit_aocs_test (a INT, b INT, C INT) WITH (appendonly=true, orientation=column) DISTRIBUTED BY (c); +CREATE INDEX ON toolkit_aocs_test(a); INSERT INTO toolkit_aocs_test SELECT i as a, i as b FROM generate_series(1,20) AS i; UPDATE toolkit_aocs_test SET b = 0 WHERE a = 1; DELETE FROM toolkit_aocs_test WHERE a = 2; @@ -66,6 +68,16 @@ SELECT count(*) FROM gp_toolkit.__gp_aoseg('toolkit_ao_test'); 1 (1 row) +SELECT * FROM gp_toolkit.__gp_aoblkdir('toolkit_ao_test'); + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) + +SELECT * FROM gp_toolkit.__gp_aoblkdir('toolkit_aocs_test'); + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) + -- The same, but on the segments. SELECT (t).* FROM ( SELECT gp_toolkit.__gp_aovisimap('toolkit_ao_test') AS t FROM gp_dist_random('gp_id') @@ -84,3 +96,25 @@ SELECT (t).segno, (t).first_row_num, (t).hidden_tupcount >= 1 as hidden_tupcount 1 | 0 | t | t (1 row) +SELECT (t).* FROM ( + SELECT gp_toolkit.__gp_aoblkdir('toolkit_ao_test') AS t FROM gp_dist_random('gp_id') +) AS x; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,2) | 1 | 0 | 0 | 1 | 0 | 100 + (0,2) | 1 | 0 | 1 | 101 | 392 | 1 +(2 rows) + +SELECT (t).* FROM ( + SELECT gp_toolkit.__gp_aoblkdir('toolkit_aocs_test') AS t FROM gp_dist_random('gp_id') +) AS x; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,4) | 1 | 0 | 0 | 1 | 0 | 100 + (0,4) | 1 | 0 | 1 | 101 | 120 | 1 + (0,5) | 1 | 1 | 0 | 1 | 0 | 100 + (0,5) | 1 | 1 | 1 | 101 | 120 | 1 + (0,6) | 1 | 2 | 0 | 1 | 0 | 100 + (0,6) | 1 | 2 | 1 | 101 | 48 | 1 +(6 rows) + diff --git a/src/test/regress/sql/gp_toolkit_ao_funcs.sql b/src/test/regress/sql/gp_toolkit_ao_funcs.sql index 545f575dfa3..0df9943ef45 100644 --- a/src/test/regress/sql/gp_toolkit_ao_funcs.sql +++ b/src/test/regress/sql/gp_toolkit_ao_funcs.sql @@ -12,6 +12,7 @@ DROP TABLE IF EXISTS toolkit_ao_test; CREATE TABLE toolkit_ao_test (a INT, b INT, c INT) WITH (appendonly=true) DISTRIBUTED BY (c); +CREATE INDEX ON toolkit_ao_test(a); INSERT INTO toolkit_ao_test SELECT i as a, i as b, 1 FROM generate_series(1,20) AS i; UPDATE toolkit_ao_test SET b = 0 WHERE a = 1; DELETE FROM toolkit_ao_test WHERE a = 2; @@ -19,6 +20,7 @@ DELETE FROM toolkit_ao_test WHERE a = 2; DROP TABLE IF EXISTS toolkit_aocs_test; CREATE TABLE toolkit_aocs_test (a INT, b INT, C INT) WITH (appendonly=true, orientation=column) DISTRIBUTED BY (c); +CREATE INDEX ON toolkit_aocs_test(a); INSERT INTO toolkit_aocs_test SELECT i as a, i as b FROM generate_series(1,20) AS i; UPDATE toolkit_aocs_test SET b = 0 WHERE a = 1; DELETE FROM toolkit_aocs_test WHERE a = 2; @@ -32,6 +34,8 @@ SELECT * FROM gp_toolkit.__gp_aovisimap('toolkit_ao_test'); SELECT count(*) FROM gp_toolkit.__gp_aovisimap_hidden_info('toolkit_ao_test'); SELECT * FROM gp_toolkit.__gp_aovisimap_entry('toolkit_ao_test'); SELECT count(*) FROM gp_toolkit.__gp_aoseg('toolkit_ao_test'); +SELECT * FROM gp_toolkit.__gp_aoblkdir('toolkit_ao_test'); +SELECT * FROM gp_toolkit.__gp_aoblkdir('toolkit_aocs_test'); -- The same, but on the segments. SELECT (t).* FROM ( @@ -40,3 +44,9 @@ SELECT (t).* FROM ( SELECT (t).segno, (t).first_row_num, (t).hidden_tupcount >= 1 as hidden_tupcount_nonzero, (t).bitmap like '01%' as bitmap_starts_with_01 FROM ( SELECT gp_toolkit.__gp_aovisimap_entry('toolkit_ao_test') AS t FROM gp_dist_random('gp_id') ) AS x; +SELECT (t).* FROM ( + SELECT gp_toolkit.__gp_aoblkdir('toolkit_ao_test') AS t FROM gp_dist_random('gp_id') +) AS x; +SELECT (t).* FROM ( + SELECT gp_toolkit.__gp_aoblkdir('toolkit_aocs_test') AS t FROM gp_dist_random('gp_id') +) AS x; From 4f686c74b308032bd4fb4bdc5746cb1c3cafc2ca Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Thu, 8 Sep 2022 09:59:23 -0700 Subject: [PATCH 02/19] aoblkdir: remove hole filling mechanism Background: (1) Block directory rowcount hole filling: The firstRowNum is allocated on the basis of gp_fast_sequence and is not always contiguous with the last minipage entry's (firstRowNum + rowCount) value. Before we insert the next minipage entry, we "fill" the rowCount of the previous one so that the range appears contiguous between successive minipage entries. (2) Hole filling in action: insert into foo values(1); -> inserts entry A with (firstRowNum, rowCount) = (1, 1) insert into foo values(1); -> updates previous entry A to (firstRowNum, rowCount) = (1, 100) since firstRowNum = 101 for the next insert, we fill the previous entry to ensure that there are no holes in the rowCount range. Motivation: There is no apparent reason why this mechanism is necessary and it becomes a hindrance for future work to support unique indexes. That work depends on the continuity of a block directory entry's range to determine unique index lookups. Changes: (1) Removing this mechanism establishes the invariant: fetching physical rows in the continuous range of a block directory entry's first and last rownumbers will always be successful. (2) We enforce this invariant with suitable ERRORs inside the fetch machinery. However, since hole filling will still exist in older versions, we do a AORelationVersion bump and ERROR out only for the current version and onwards. Note: We use the formatversion attribute of ao(cs)seg rels instead of the unused Minipage->version member. This is because even though it makes more semantic sense and constitutes an equivalent condition for the ERROR, it means more work for banning code in unique index creation: If we were to use Minipage->version, we would have to check every minipage in the block directory, which might be more expensive than the limited number of ao(cs)seg tuples we would have to check on the other hand. Co-authored-by: Ashwin Agrawal --- src/backend/access/aocs/aocsam.c | 52 +- src/backend/access/appendonly/appendonlyam.c | 71 +- .../appendonly/appendonlyblockdirectory.c | 46 +- src/include/catalog/pg_appendonly.h | 3 +- src/include/cdb/cdbappendonlyblockdirectory.h | 19 +- src/test/isolation2/expected/ao_blkdir.out | 630 ++++++++++++++++++ .../expected/uao_crash_compaction_row.out | 86 +-- src/test/isolation2/isolation2_schedule | 2 +- .../uao/compaction_utility_insert.source | 2 +- .../output/uao/max_concurrency.source | 254 +++---- .../output/uao/max_concurrency2.source | 254 +++---- .../output/uao/select_after_vacuum.source | 12 +- .../output/uao/vacuum_cleanup.source | 347 +++++++++- .../uao/vacuum_self_serializable.source | 12 +- src/test/isolation2/sql/ao_blkdir.sql | 83 +++ .../regress/expected/alter_table_set_am.out | 0 .../regress/expected/gp_toolkit_ao_funcs.out | 8 +- src/test/regress/output/gp_tablespace.source | 18 +- 18 files changed, 1518 insertions(+), 381 deletions(-) create mode 100644 src/test/isolation2/expected/ao_blkdir.out create mode 100644 src/test/isolation2/sql/ao_blkdir.sql create mode 100644 src/test/regress/expected/alter_table_set_am.out diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index 267dd837697..72838a1df1c 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -1272,6 +1272,10 @@ positionSkipCurrentBlock(DatumStreamFetchDesc datumStreamFetchDesc) datumStreamFetchDesc->currentBlock.lastRowNum + 1; } +/* + * Fetch the tuple's datum from the block indicated by the block directory entry + * that covers the tuple, given the colno. + */ static void fetchFromCurrentBlock(AOCSFetchDesc aocsFetchDesc, int64 rowNum, @@ -1331,14 +1335,49 @@ scanToFetchValue(AOCSFetchDesc aocsFetchDesc, TupleTableSlot *slot, int colno) { - DatumStreamFetchDesc datumStreamFetchDesc = aocsFetchDesc->datumStreamFetchDesc[colno]; - DatumStreamRead *datumStream = datumStreamFetchDesc->datumStream; - bool found; + DatumStreamFetchDesc datumStreamFetchDesc = aocsFetchDesc->datumStreamFetchDesc[colno]; + DatumStreamRead *datumStream = datumStreamFetchDesc->datumStream; + AOFetchBlockMetadata *currentBlock = &datumStreamFetchDesc->currentBlock; + AppendOnlyBlockDirectoryEntry *entry = ¤tBlock->blockDirectoryEntry; + bool found; found = datumstreamread_find_block(datumStream, datumStreamFetchDesc, rowNum); - if (found) + if (!found) + { + if (AppendOnlyBlockDirectoryEntry_RangeHasRow(entry, rowNum)) + { + /* + * We fell into a hole inside the resolved block directory entry + * we obtained from AppendOnlyBlockDirectory_GetEntry(). + * This should not be happening for versions >= PG12. Scream + * appropriately. See AppendOnlyBlockDirectoryEntry for details. + */ + ereportif(datumStream->ao_read.formatVersion >= AORelationVersion_PG12, + ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("datum with row number %ld and col no %d not found in block directory entry range", rowNum, colno), + errdetail("block directory entry: (fileOffset = %ld, firstRowNum = %ld, " + "afterFileOffset = %ld, lastRowNum = %ld)", + entry->range.fileOffset, + entry->range.firstRowNum, + entry->range.afterFileOffset, + entry->range.lastRowNum))); + } + else + { + /* + * The resolved block directory entry we obtained from + * AppendOnlyBlockDirectory_GetEntry() has range s.t. + * firstRowNum < lastRowNum < rowNum + * This can happen when rowNum maps to an aborted transaction, and + * we find an earlier committed block directory row due to the + * <= scan condition in AppendOnlyBlockDirectory_GetEntry(). + */ + } + } + else fetchFromCurrentBlock(aocsFetchDesc, rowNum, slot, colno); return found; @@ -1412,6 +1451,11 @@ openFetchSegmentFile(AOCSFetchDesc aocsFetchDesc, return true; } +/* + * Note: we don't reset the block directory entry here. This is crucial, so we + * can use the block directory entry later on. See comment in AOFetchBlockMetadata + * FIXME: reset other fields here. + */ static void resetCurrentBlockInfo(CurrentBlock *currentBlock) { diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 4d84e957026..8775cf639c9 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -1978,30 +1978,69 @@ fetchNextBlock(AppendOnlyFetchDesc aoFetchDesc) return true; } -static bool +/* + * Fetch the tuple from the block indicated by the block directory entry that + * covers the tuple. + */ +static void fetchFromCurrentBlock(AppendOnlyFetchDesc aoFetchDesc, int64 rowNum, TupleTableSlot *slot) { - Assert(aoFetchDesc->currentBlock.have); - Assert(rowNum >= aoFetchDesc->currentBlock.firstRowNum); - Assert(rowNum <= aoFetchDesc->currentBlock.lastRowNum); + bool fetched; + AOFetchBlockMetadata *currentBlock = &aoFetchDesc->currentBlock; + AppendOnlyExecutorReadBlock *executorReadBlock = &aoFetchDesc->executorReadBlock; + AppendOnlyStorageRead *storageRead = &aoFetchDesc->storageRead; + AppendOnlyBlockDirectoryEntry *entry = ¤tBlock->blockDirectoryEntry; - if (!aoFetchDesc->currentBlock.gotContents) + if (!currentBlock->gotContents) { /* * Do decompression if necessary and get contents. */ - AppendOnlyExecutorReadBlock_GetContents(&aoFetchDesc->executorReadBlock); + AppendOnlyExecutorReadBlock_GetContents(executorReadBlock); - aoFetchDesc->currentBlock.gotContents = true; + currentBlock->gotContents = true; } - return AppendOnlyExecutorReadBlock_FetchTuple(&aoFetchDesc->executorReadBlock, - rowNum, - /* nkeys */ 0, - /* key */ NULL, - slot); + fetched = AppendOnlyExecutorReadBlock_FetchTuple(executorReadBlock, + rowNum, + /* nkeys */ 0, + /* key */ NULL, + slot); + if (!fetched) + { + if (AppendOnlyBlockDirectoryEntry_RangeHasRow(entry, rowNum)) + { + /* + * We fell into a hole inside the resolved block directory entry + * we obtained from AppendOnlyBlockDirectory_GetEntry(). + * This should not be happening for versions >= PG12. Scream + * appropriately. See AppendOnlyBlockDirectoryEntry for details. + */ + ereportif(storageRead->formatVersion >= AORelationVersion_PG12, + ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("tuple with row number %ld not found in block directory entry range", rowNum), + errdetail("block directory entry: (fileOffset = %ld, firstRowNum = %ld, " + "afterFileOffset = %ld, lastRowNum = %ld)", + entry->range.fileOffset, + entry->range.firstRowNum, + entry->range.afterFileOffset, + entry->range.lastRowNum))); + } + else + { + /* + * The resolved block directory entry we obtained from + * AppendOnlyBlockDirectory_GetEntry() has range s.t. + * firstRowNum < lastRowNum < rowNum + * This can happen when rowNum maps to an aborted transaction, and + * we find an earlier committed block directory row due to the + * <= scan condition in AppendOnlyBlockDirectory_GetEntry(). + */ + } + } } static void @@ -2106,7 +2145,10 @@ scanToFetchTuple(AppendOnlyFetchDesc aoFetchDesc, } if (rowNum <= aoFetchDesc->currentBlock.lastRowNum) - return fetchFromCurrentBlock(aoFetchDesc, rowNum, slot); + { + fetchFromCurrentBlock(aoFetchDesc, rowNum, slot); + return true; + } /* * Update information to get next block. @@ -2355,7 +2397,8 @@ appendonly_fetch(AppendOnlyFetchDesc aoFetchDesc, } return false; /* row has been deleted or updated. */ } - return fetchFromCurrentBlock(aoFetchDesc, rowNum, slot); + fetchFromCurrentBlock(aoFetchDesc, rowNum, slot); + return true; } /* diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index 13fe635a72a..61b493c9a82 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -578,7 +578,12 @@ AppendOnlyBlockDirectory_GetEntry( /* Ignore columns that are not projected. */ continue; } - /* Setup the scan keys for the scan. */ + /* + * Set up the scan keys values. The keys have already been set up in + * init_internal() with the following strategy: + * (=segmentFileNum, =columnGroupNo, <=rowNum) + * See init_internal(). + */ Assert(scanKeys != NULL); scanKeys[0].sk_argument = Int32GetDatum(segmentFileNum); scanKeys[1].sk_argument = Int32GetDatum(tmpGroupNo); @@ -641,6 +646,15 @@ AppendOnlyBlockDirectory_GetEntry( /* * Since the last few blocks may not be logged in the block * directory, we always use the last entry. + * + * FIXME: If we didn't find a suitable entry, why even use the last + * entry? Currently, as it stands we would most likely return + * true from this function. This will lead to us having to do a + * fetch of the tuple from the physical file in the layer above (see + * scanToFetchTuple()), where we would ultimately find the tuple + * missing. Would it be correct to set the directory entry here to + * be the last one (for caching purposes) and return false, in order + * to avoid this physical file read? */ entry_no = minipageInfo->numMinipageEntries - 1; } @@ -702,7 +716,6 @@ insert_new_entry( MinipageEntry *entry = NULL; MinipagePerColumnGroup *minipageInfo; int minipageIndex; - int lastEntryNo; if (rowCount == 0) return false; @@ -732,35 +745,6 @@ insert_new_entry( minipageInfo = &blockDirectory->minipages[minipageIndex]; Assert(minipageInfo->numMinipageEntries <= (uint32) NUM_MINIPAGE_ENTRIES); - lastEntryNo = minipageInfo->numMinipageEntries - 1; - if (lastEntryNo >= 0) - { - entry = &(minipageInfo->minipage->entry[lastEntryNo]); - - Assert(entry->firstRowNum < firstRowNum); - Assert(entry->fileOffset < fileOffset); - - if (gp_blockdirectory_entry_min_range > 0 && - fileOffset - entry->fileOffset < gp_blockdirectory_entry_min_range) - return true; - - /* Update the rowCount in the latest entry */ - Assert(entry->rowCount <= firstRowNum - entry->firstRowNum); - - ereportif(Debug_appendonly_print_blockdirectory, LOG, - (errmsg("Append-only block directory update entry: " - "(firstRowNum, columnGroupNo, fileOffset, rowCount) = (" INT64_FORMAT - ", %d, " INT64_FORMAT ", " INT64_FORMAT ") at index %d to " - "(firstRowNum, columnGroupNo, fileOffset, rowCount) = (" INT64_FORMAT - ", %d, " INT64_FORMAT ", " INT64_FORMAT ")", - entry->firstRowNum, columnGroupNo, entry->fileOffset, entry->rowCount, - minipageInfo->numMinipageEntries - 1, - entry->firstRowNum, columnGroupNo, entry->fileOffset, - firstRowNum - entry->firstRowNum))); - - entry->rowCount = firstRowNum - entry->firstRowNum; - } - if (minipageInfo->numMinipageEntries >= (uint32) gp_blockdirectory_minipage_size) { write_minipage(blockDirectory, columnGroupNo, minipageInfo); diff --git a/src/include/catalog/pg_appendonly.h b/src/include/catalog/pg_appendonly.h index 1800c33e278..59f07ef0ca5 100644 --- a/src/include/catalog/pg_appendonly.h +++ b/src/include/catalog/pg_appendonly.h @@ -69,10 +69,11 @@ typedef enum AORelationVersion * were introduced, see MPP-7251 and MPP-7372. */ AORelationVersion_PG83 = 3, /* Same as Aligned64bit, but numerics are stored * in the PostgreSQL 8.3 format. */ + AORelationVersion_PG12 = 4, /* version that removed block directory hole filling. */ MaxAORelationVersion /* must always be last */ } AORelationVersion; -#define AORelationVersion_GetLatest() AORelationVersion_PG83 +#define AORelationVersion_GetLatest() AORelationVersion_PG12 #define AORelationVersion_IsValid(version) \ (version > AORelationVersion_None && version < MaxAORelationVersion) diff --git a/src/include/cdb/cdbappendonlyblockdirectory.h b/src/include/cdb/cdbappendonlyblockdirectory.h index 998dea0ee84..0cb8c18dea7 100644 --- a/src/include/cdb/cdbappendonlyblockdirectory.h +++ b/src/include/cdb/cdbappendonlyblockdirectory.h @@ -23,10 +23,21 @@ extern int gp_blockdirectory_entry_min_range; extern int gp_blockdirectory_minipage_size; +/* + * In-memory equivalent of on-disk data structure MinipageEntry, used to + * represent a block directory entry. + */ typedef struct AppendOnlyBlockDirectoryEntry { /* - * The range of blocks covered by the Block Directory entry. + * The range of blocks covered by the Block Directory entry, which is the + * continuous range [firstRowNum, lastRowNum]. There are no gaps (or holes) + * within this range. However, there may be gaps between successive block + * directory entries. For e.g. entry0 could have range [1,50] and entry1 + * could have: [100,150]. The reason gaps arise between successive entries + * is that we allocate row numbers using the gp_fastsequence mechanism, + * which allocates blocks of row numbers of a pre-determined size (that may + * be larger than the number of blocks being inserted) */ struct range { @@ -126,6 +137,12 @@ typedef struct AppendOnlyBlockDirectory typedef struct CurrentBlock { + /* + * Current cached block directory entry. + * FIXME: At times, we rely upon the values in this struct to be valid even + * when AOFetchBlockMetadata->valid = false. This indicates that this should + * live elsewhere. + */ AppendOnlyBlockDirectoryEntry blockDirectoryEntry; bool have; diff --git a/src/test/isolation2/expected/ao_blkdir.out b/src/test/isolation2/expected/ao_blkdir.out new file mode 100644 index 00000000000..988ea9d3725 --- /dev/null +++ b/src/test/isolation2/expected/ao_blkdir.out @@ -0,0 +1,630 @@ +-- White-box tests asserting composition of AO/CO block directory entries. +-- All tuples are directed to seg0 and each INSERT has an increasing row count +-- to make their identification easy. + +-------------------------------------------------------------------------------- +-- AO tables +-------------------------------------------------------------------------------- + +CREATE TABLE ao_blkdir_test(i int, j int) USING ao_row DISTRIBUTED BY (j); +CREATE +CREATE INDEX ao_blkdir_test_idx ON ao_blkdir_test(i); +CREATE + +1: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +-- There should be 1 block directory row with a single entry covering 10 rows +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,1) | 1 | 0 | 0 | 1 | 0 | 10 +(1 row) + +1: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(11, 30) i; +INSERT 20 +-- There should be 2 block directory entries in a new block directory row, and +-- the row from the previous INSERT should not be visible. The entry from the +-- first INSERT should remain unchanged. +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,2) | 1 | 0 | 0 | 1 | 0 | 10 + (0,2) | 1 | 0 | 1 | 101 | 216 | 20 +(2 rows) + +1: BEGIN; +BEGIN +1: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(31, 60) i; +INSERT 30 +2: BEGIN; +BEGIN +2: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(71, 110) i; +INSERT 40 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +-- The second INSERT of 40 rows above would have landed in segfile 1 (unlike +-- segfile 0, like the first INSERT of 30 rows above). This should be reflected +-- in the block directory entries for these rows. +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,3) | 1 | 0 | 0 | 1 | 0 | 10 + (0,3) | 1 | 0 | 1 | 101 | 216 | 20 + (0,3) | 1 | 0 | 2 | 201 | 608 | 30 + (0,4) | 2 | 0 | 0 | 1 | 0 | 40 +(4 rows) + +TRUNCATE ao_blkdir_test; +TRUNCATE +-- Insert enough rows to overflow the first block directory minipage by 2. +INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(1, 292700) i; +INSERT 292700 +-- There should be 2 block directory rows, one with 161 entries covering 292698 +-- rows and the other with 1 entry covering the 2 overflow rows. +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,1) | 1 | 0 | 0 | 1 | 0 | 1818 + (0,1) | 1 | 0 | 1 | 1819 | 32760 | 1818 + (0,1) | 1 | 0 | 2 | 3637 | 65520 | 1818 + (0,1) | 1 | 0 | 3 | 5455 | 98280 | 1818 + (0,1) | 1 | 0 | 4 | 7273 | 131040 | 1818 + (0,1) | 1 | 0 | 5 | 9091 | 163800 | 1818 + (0,1) | 1 | 0 | 6 | 10909 | 196560 | 1818 + (0,1) | 1 | 0 | 7 | 12727 | 229320 | 1818 + (0,1) | 1 | 0 | 8 | 14545 | 262080 | 1818 + (0,1) | 1 | 0 | 9 | 16363 | 294840 | 1818 + (0,1) | 1 | 0 | 10 | 18181 | 327600 | 1818 + (0,1) | 1 | 0 | 11 | 19999 | 360360 | 1818 + (0,1) | 1 | 0 | 12 | 21817 | 393120 | 1818 + (0,1) | 1 | 0 | 13 | 23635 | 425880 | 1818 + (0,1) | 1 | 0 | 14 | 25453 | 458640 | 1818 + (0,1) | 1 | 0 | 15 | 27271 | 491400 | 1818 + (0,1) | 1 | 0 | 16 | 29089 | 524160 | 1818 + (0,1) | 1 | 0 | 17 | 30907 | 556920 | 1818 + (0,1) | 1 | 0 | 18 | 32725 | 589680 | 1818 + (0,1) | 1 | 0 | 19 | 34543 | 622440 | 1818 + (0,1) | 1 | 0 | 20 | 36361 | 655200 | 1818 + (0,1) | 1 | 0 | 21 | 38179 | 687960 | 1818 + (0,1) | 1 | 0 | 22 | 39997 | 720720 | 1818 + (0,1) | 1 | 0 | 23 | 41815 | 753480 | 1818 + (0,1) | 1 | 0 | 24 | 43633 | 786240 | 1818 + (0,1) | 1 | 0 | 25 | 45451 | 819000 | 1818 + (0,1) | 1 | 0 | 26 | 47269 | 851760 | 1818 + (0,1) | 1 | 0 | 27 | 49087 | 884520 | 1818 + (0,1) | 1 | 0 | 28 | 50905 | 917280 | 1818 + (0,1) | 1 | 0 | 29 | 52723 | 950040 | 1818 + (0,1) | 1 | 0 | 30 | 54541 | 982800 | 1818 + (0,1) | 1 | 0 | 31 | 56359 | 1015560 | 1818 + (0,1) | 1 | 0 | 32 | 58177 | 1048320 | 1818 + (0,1) | 1 | 0 | 33 | 59995 | 1081080 | 1818 + (0,1) | 1 | 0 | 34 | 61813 | 1113840 | 1818 + (0,1) | 1 | 0 | 35 | 63631 | 1146600 | 1818 + (0,1) | 1 | 0 | 36 | 65449 | 1179360 | 1818 + (0,1) | 1 | 0 | 37 | 67267 | 1212120 | 1818 + (0,1) | 1 | 0 | 38 | 69085 | 1244880 | 1818 + (0,1) | 1 | 0 | 39 | 70903 | 1277640 | 1818 + (0,1) | 1 | 0 | 40 | 72721 | 1310400 | 1818 + (0,1) | 1 | 0 | 41 | 74539 | 1343160 | 1818 + (0,1) | 1 | 0 | 42 | 76357 | 1375920 | 1818 + (0,1) | 1 | 0 | 43 | 78175 | 1408680 | 1818 + (0,1) | 1 | 0 | 44 | 79993 | 1441440 | 1818 + (0,1) | 1 | 0 | 45 | 81811 | 1474200 | 1818 + (0,1) | 1 | 0 | 46 | 83629 | 1506960 | 1818 + (0,1) | 1 | 0 | 47 | 85447 | 1539720 | 1818 + (0,1) | 1 | 0 | 48 | 87265 | 1572480 | 1818 + (0,1) | 1 | 0 | 49 | 89083 | 1605240 | 1818 + (0,1) | 1 | 0 | 50 | 90901 | 1638000 | 1818 + (0,1) | 1 | 0 | 51 | 92719 | 1670760 | 1818 + (0,1) | 1 | 0 | 52 | 94537 | 1703520 | 1818 + (0,1) | 1 | 0 | 53 | 96355 | 1736280 | 1818 + (0,1) | 1 | 0 | 54 | 98173 | 1769040 | 1818 + (0,1) | 1 | 0 | 55 | 99991 | 1801800 | 1818 + (0,1) | 1 | 0 | 56 | 101809 | 1834560 | 1818 + (0,1) | 1 | 0 | 57 | 103627 | 1867320 | 1818 + (0,1) | 1 | 0 | 58 | 105445 | 1900080 | 1818 + (0,1) | 1 | 0 | 59 | 107263 | 1932840 | 1818 + (0,1) | 1 | 0 | 60 | 109081 | 1965600 | 1818 + (0,1) | 1 | 0 | 61 | 110899 | 1998360 | 1818 + (0,1) | 1 | 0 | 62 | 112717 | 2031120 | 1818 + (0,1) | 1 | 0 | 63 | 114535 | 2063880 | 1818 + (0,1) | 1 | 0 | 64 | 116353 | 2096640 | 1818 + (0,1) | 1 | 0 | 65 | 118171 | 2129400 | 1818 + (0,1) | 1 | 0 | 66 | 119989 | 2162160 | 1818 + (0,1) | 1 | 0 | 67 | 121807 | 2194920 | 1818 + (0,1) | 1 | 0 | 68 | 123625 | 2227680 | 1818 + (0,1) | 1 | 0 | 69 | 125443 | 2260440 | 1818 + (0,1) | 1 | 0 | 70 | 127261 | 2293200 | 1818 + (0,1) | 1 | 0 | 71 | 129079 | 2325960 | 1818 + (0,1) | 1 | 0 | 72 | 130897 | 2358720 | 1818 + (0,1) | 1 | 0 | 73 | 132715 | 2391480 | 1818 + (0,1) | 1 | 0 | 74 | 134533 | 2424240 | 1818 + (0,1) | 1 | 0 | 75 | 136351 | 2457000 | 1818 + (0,1) | 1 | 0 | 76 | 138169 | 2489760 | 1818 + (0,1) | 1 | 0 | 77 | 139987 | 2522520 | 1818 + (0,1) | 1 | 0 | 78 | 141805 | 2555280 | 1818 + (0,1) | 1 | 0 | 79 | 143623 | 2588040 | 1818 + (0,1) | 1 | 0 | 80 | 145441 | 2620800 | 1818 + (0,1) | 1 | 0 | 81 | 147259 | 2653560 | 1818 + (0,1) | 1 | 0 | 82 | 149077 | 2686320 | 1818 + (0,1) | 1 | 0 | 83 | 150895 | 2719080 | 1818 + (0,1) | 1 | 0 | 84 | 152713 | 2751840 | 1818 + (0,1) | 1 | 0 | 85 | 154531 | 2784600 | 1818 + (0,1) | 1 | 0 | 86 | 156349 | 2817360 | 1818 + (0,1) | 1 | 0 | 87 | 158167 | 2850120 | 1818 + (0,1) | 1 | 0 | 88 | 159985 | 2882880 | 1818 + (0,1) | 1 | 0 | 89 | 161803 | 2915640 | 1818 + (0,1) | 1 | 0 | 90 | 163621 | 2948400 | 1818 + (0,1) | 1 | 0 | 91 | 165439 | 2981160 | 1818 + (0,1) | 1 | 0 | 92 | 167257 | 3013920 | 1818 + (0,1) | 1 | 0 | 93 | 169075 | 3046680 | 1818 + (0,1) | 1 | 0 | 94 | 170893 | 3079440 | 1818 + (0,1) | 1 | 0 | 95 | 172711 | 3112200 | 1818 + (0,1) | 1 | 0 | 96 | 174529 | 3144960 | 1818 + (0,1) | 1 | 0 | 97 | 176347 | 3177720 | 1818 + (0,1) | 1 | 0 | 98 | 178165 | 3210480 | 1818 + (0,1) | 1 | 0 | 99 | 179983 | 3243240 | 1818 + (0,1) | 1 | 0 | 100 | 181801 | 3276000 | 1818 + (0,1) | 1 | 0 | 101 | 183619 | 3308760 | 1818 + (0,1) | 1 | 0 | 102 | 185437 | 3341520 | 1818 + (0,1) | 1 | 0 | 103 | 187255 | 3374280 | 1818 + (0,1) | 1 | 0 | 104 | 189073 | 3407040 | 1818 + (0,1) | 1 | 0 | 105 | 190891 | 3439800 | 1818 + (0,1) | 1 | 0 | 106 | 192709 | 3472560 | 1818 + (0,1) | 1 | 0 | 107 | 194527 | 3505320 | 1818 + (0,1) | 1 | 0 | 108 | 196345 | 3538080 | 1818 + (0,1) | 1 | 0 | 109 | 198163 | 3570840 | 1818 + (0,1) | 1 | 0 | 110 | 199981 | 3603600 | 1818 + (0,1) | 1 | 0 | 111 | 201799 | 3636360 | 1818 + (0,1) | 1 | 0 | 112 | 203617 | 3669120 | 1818 + (0,1) | 1 | 0 | 113 | 205435 | 3701880 | 1818 + (0,1) | 1 | 0 | 114 | 207253 | 3734640 | 1818 + (0,1) | 1 | 0 | 115 | 209071 | 3767400 | 1818 + (0,1) | 1 | 0 | 116 | 210889 | 3800160 | 1818 + (0,1) | 1 | 0 | 117 | 212707 | 3832920 | 1818 + (0,1) | 1 | 0 | 118 | 214525 | 3865680 | 1818 + (0,1) | 1 | 0 | 119 | 216343 | 3898440 | 1818 + (0,1) | 1 | 0 | 120 | 218161 | 3931200 | 1818 + (0,1) | 1 | 0 | 121 | 219979 | 3963960 | 1818 + (0,1) | 1 | 0 | 122 | 221797 | 3996720 | 1818 + (0,1) | 1 | 0 | 123 | 223615 | 4029480 | 1818 + (0,1) | 1 | 0 | 124 | 225433 | 4062240 | 1818 + (0,1) | 1 | 0 | 125 | 227251 | 4095000 | 1818 + (0,1) | 1 | 0 | 126 | 229069 | 4127760 | 1818 + (0,1) | 1 | 0 | 127 | 230887 | 4160520 | 1818 + (0,1) | 1 | 0 | 128 | 232705 | 4193280 | 1818 + (0,1) | 1 | 0 | 129 | 234523 | 4226040 | 1818 + (0,1) | 1 | 0 | 130 | 236341 | 4258800 | 1818 + (0,1) | 1 | 0 | 131 | 238159 | 4291560 | 1818 + (0,1) | 1 | 0 | 132 | 239977 | 4324320 | 1818 + (0,1) | 1 | 0 | 133 | 241795 | 4357080 | 1818 + (0,1) | 1 | 0 | 134 | 243613 | 4389840 | 1818 + (0,1) | 1 | 0 | 135 | 245431 | 4422600 | 1818 + (0,1) | 1 | 0 | 136 | 247249 | 4455360 | 1818 + (0,1) | 1 | 0 | 137 | 249067 | 4488120 | 1818 + (0,1) | 1 | 0 | 138 | 250885 | 4520880 | 1818 + (0,1) | 1 | 0 | 139 | 252703 | 4553640 | 1818 + (0,1) | 1 | 0 | 140 | 254521 | 4586400 | 1818 + (0,1) | 1 | 0 | 141 | 256339 | 4619160 | 1818 + (0,1) | 1 | 0 | 142 | 258157 | 4651920 | 1818 + (0,1) | 1 | 0 | 143 | 259975 | 4684680 | 1818 + (0,1) | 1 | 0 | 144 | 261793 | 4717440 | 1818 + (0,1) | 1 | 0 | 145 | 263611 | 4750200 | 1818 + (0,1) | 1 | 0 | 146 | 265429 | 4782960 | 1818 + (0,1) | 1 | 0 | 147 | 267247 | 4815720 | 1818 + (0,1) | 1 | 0 | 148 | 269065 | 4848480 | 1818 + (0,1) | 1 | 0 | 149 | 270883 | 4881240 | 1818 + (0,1) | 1 | 0 | 150 | 272701 | 4914000 | 1818 + (0,1) | 1 | 0 | 151 | 274519 | 4946760 | 1818 + (0,1) | 1 | 0 | 152 | 276337 | 4979520 | 1818 + (0,1) | 1 | 0 | 153 | 278155 | 5012280 | 1818 + (0,1) | 1 | 0 | 154 | 279973 | 5045040 | 1818 + (0,1) | 1 | 0 | 155 | 281791 | 5077800 | 1818 + (0,1) | 1 | 0 | 156 | 283609 | 5110560 | 1818 + (0,1) | 1 | 0 | 157 | 285427 | 5143320 | 1818 + (0,1) | 1 | 0 | 158 | 287245 | 5176080 | 1818 + (0,1) | 1 | 0 | 159 | 289063 | 5208840 | 1818 + (0,1) | 1 | 0 | 160 | 290881 | 5241600 | 1818 + (0,2) | 1 | 0 | 0 | 292699 | 5274360 | 2 +(162 rows) + +-------------------------------------------------------------------------------- +-- AOCO tables +-------------------------------------------------------------------------------- + +CREATE TABLE aoco_blkdir_test(i int, j int) USING ao_column DISTRIBUTED BY (j); +CREATE +CREATE INDEX aoco_blkdir_test_idx ON aoco_blkdir_test(i); +CREATE + +1: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(1, 10) i; +INSERT 10 +-- There should be 2 block directory rows with a single entry covering 10 rows, +-- (1 for each column). +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,1) | 1 | 0 | 0 | 1 | 0 | 10 + (0,2) | 1 | 1 | 0 | 1 | 0 | 10 +(2 rows) + +1: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(11, 30) i; +INSERT 20 +-- There should be 2 block directory rows, carrying 2 entries each. The rows +-- from the previous INSERT should not be visible. The entries from the first +-- INSERT should remain unchanged. +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,3) | 1 | 0 | 0 | 1 | 0 | 10 + (0,3) | 1 | 0 | 1 | 101 | 80 | 20 + (0,4) | 1 | 1 | 0 | 1 | 0 | 10 + (0,4) | 1 | 1 | 1 | 101 | 80 | 20 +(4 rows) + +1: BEGIN; +BEGIN +1: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(31, 60) i; +INSERT 30 +2: BEGIN; +BEGIN +2: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(71, 110) i; +INSERT 40 +1: COMMIT; +COMMIT +2: COMMIT; +COMMIT +-- The second INSERT of 40 rows above would have landed in segfile 1 (unlike +-- segfile 0, like the first INSERT of 30 rows above). This should be reflected +-- in the block directory entries for these rows. +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,5) | 1 | 0 | 0 | 1 | 0 | 10 + (0,5) | 1 | 0 | 1 | 101 | 80 | 20 + (0,5) | 1 | 0 | 2 | 201 | 200 | 30 + (0,6) | 1 | 1 | 0 | 1 | 0 | 10 + (0,6) | 1 | 1 | 1 | 101 | 80 | 20 + (0,6) | 1 | 1 | 2 | 201 | 200 | 30 + (0,7) | 2 | 0 | 0 | 1 | 0 | 40 + (0,8) | 2 | 1 | 0 | 1 | 0 | 40 +(8 rows) + +TRUNCATE aoco_blkdir_test; +TRUNCATE +-- Insert enough rows to overflow the first block directory minipage by 2. +INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(1, 1317143) i; +INSERT 1317143 +-- There should be 2 block directory rows, 2 for each column, one with 161 +-- entries covering 1317141 rows and the other with 1 entry covering the 2 +-- overflow rows. +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,1) | 1 | 0 | 0 | 1 | 0 | 8181 + (0,1) | 1 | 0 | 1 | 8182 | 32768 | 8181 + (0,1) | 1 | 0 | 2 | 16363 | 65536 | 8181 + (0,1) | 1 | 0 | 3 | 24544 | 98304 | 8181 + (0,1) | 1 | 0 | 4 | 32725 | 131072 | 8181 + (0,1) | 1 | 0 | 5 | 40906 | 163840 | 8181 + (0,1) | 1 | 0 | 6 | 49087 | 196608 | 8181 + (0,1) | 1 | 0 | 7 | 57268 | 229376 | 8181 + (0,1) | 1 | 0 | 8 | 65449 | 262144 | 8181 + (0,1) | 1 | 0 | 9 | 73630 | 294912 | 8181 + (0,1) | 1 | 0 | 10 | 81811 | 327680 | 8181 + (0,1) | 1 | 0 | 11 | 89992 | 360448 | 8181 + (0,1) | 1 | 0 | 12 | 98173 | 393216 | 8181 + (0,1) | 1 | 0 | 13 | 106354 | 425984 | 8181 + (0,1) | 1 | 0 | 14 | 114535 | 458752 | 8181 + (0,1) | 1 | 0 | 15 | 122716 | 491520 | 8181 + (0,1) | 1 | 0 | 16 | 130897 | 524288 | 8181 + (0,1) | 1 | 0 | 17 | 139078 | 557056 | 8181 + (0,1) | 1 | 0 | 18 | 147259 | 589824 | 8181 + (0,1) | 1 | 0 | 19 | 155440 | 622592 | 8181 + (0,1) | 1 | 0 | 20 | 163621 | 655360 | 8181 + (0,1) | 1 | 0 | 21 | 171802 | 688128 | 8181 + (0,1) | 1 | 0 | 22 | 179983 | 720896 | 8181 + (0,1) | 1 | 0 | 23 | 188164 | 753664 | 8181 + (0,1) | 1 | 0 | 24 | 196345 | 786432 | 8181 + (0,1) | 1 | 0 | 25 | 204526 | 819200 | 8181 + (0,1) | 1 | 0 | 26 | 212707 | 851968 | 8181 + (0,1) | 1 | 0 | 27 | 220888 | 884736 | 8181 + (0,1) | 1 | 0 | 28 | 229069 | 917504 | 8181 + (0,1) | 1 | 0 | 29 | 237250 | 950272 | 8181 + (0,1) | 1 | 0 | 30 | 245431 | 983040 | 8181 + (0,1) | 1 | 0 | 31 | 253612 | 1015808 | 8181 + (0,1) | 1 | 0 | 32 | 261793 | 1048576 | 8181 + (0,1) | 1 | 0 | 33 | 269974 | 1081344 | 8181 + (0,1) | 1 | 0 | 34 | 278155 | 1114112 | 8181 + (0,1) | 1 | 0 | 35 | 286336 | 1146880 | 8181 + (0,1) | 1 | 0 | 36 | 294517 | 1179648 | 8181 + (0,1) | 1 | 0 | 37 | 302698 | 1212416 | 8181 + (0,1) | 1 | 0 | 38 | 310879 | 1245184 | 8181 + (0,1) | 1 | 0 | 39 | 319060 | 1277952 | 8181 + (0,1) | 1 | 0 | 40 | 327241 | 1310720 | 8181 + (0,1) | 1 | 0 | 41 | 335422 | 1343488 | 8181 + (0,1) | 1 | 0 | 42 | 343603 | 1376256 | 8181 + (0,1) | 1 | 0 | 43 | 351784 | 1409024 | 8181 + (0,1) | 1 | 0 | 44 | 359965 | 1441792 | 8181 + (0,1) | 1 | 0 | 45 | 368146 | 1474560 | 8181 + (0,1) | 1 | 0 | 46 | 376327 | 1507328 | 8181 + (0,1) | 1 | 0 | 47 | 384508 | 1540096 | 8181 + (0,1) | 1 | 0 | 48 | 392689 | 1572864 | 8181 + (0,1) | 1 | 0 | 49 | 400870 | 1605632 | 8181 + (0,1) | 1 | 0 | 50 | 409051 | 1638400 | 8181 + (0,1) | 1 | 0 | 51 | 417232 | 1671168 | 8181 + (0,1) | 1 | 0 | 52 | 425413 | 1703936 | 8181 + (0,1) | 1 | 0 | 53 | 433594 | 1736704 | 8181 + (0,1) | 1 | 0 | 54 | 441775 | 1769472 | 8181 + (0,1) | 1 | 0 | 55 | 449956 | 1802240 | 8181 + (0,1) | 1 | 0 | 56 | 458137 | 1835008 | 8181 + (0,1) | 1 | 0 | 57 | 466318 | 1867776 | 8181 + (0,1) | 1 | 0 | 58 | 474499 | 1900544 | 8181 + (0,1) | 1 | 0 | 59 | 482680 | 1933312 | 8181 + (0,1) | 1 | 0 | 60 | 490861 | 1966080 | 8181 + (0,1) | 1 | 0 | 61 | 499042 | 1998848 | 8181 + (0,1) | 1 | 0 | 62 | 507223 | 2031616 | 8181 + (0,1) | 1 | 0 | 63 | 515404 | 2064384 | 8181 + (0,1) | 1 | 0 | 64 | 523585 | 2097152 | 8181 + (0,1) | 1 | 0 | 65 | 531766 | 2129920 | 8181 + (0,1) | 1 | 0 | 66 | 539947 | 2162688 | 8181 + (0,1) | 1 | 0 | 67 | 548128 | 2195456 | 8181 + (0,1) | 1 | 0 | 68 | 556309 | 2228224 | 8181 + (0,1) | 1 | 0 | 69 | 564490 | 2260992 | 8181 + (0,1) | 1 | 0 | 70 | 572671 | 2293760 | 8181 + (0,1) | 1 | 0 | 71 | 580852 | 2326528 | 8181 + (0,1) | 1 | 0 | 72 | 589033 | 2359296 | 8181 + (0,1) | 1 | 0 | 73 | 597214 | 2392064 | 8181 + (0,1) | 1 | 0 | 74 | 605395 | 2424832 | 8181 + (0,1) | 1 | 0 | 75 | 613576 | 2457600 | 8181 + (0,1) | 1 | 0 | 76 | 621757 | 2490368 | 8181 + (0,1) | 1 | 0 | 77 | 629938 | 2523136 | 8181 + (0,1) | 1 | 0 | 78 | 638119 | 2555904 | 8181 + (0,1) | 1 | 0 | 79 | 646300 | 2588672 | 8181 + (0,1) | 1 | 0 | 80 | 654481 | 2621440 | 8181 + (0,1) | 1 | 0 | 81 | 662662 | 2654208 | 8181 + (0,1) | 1 | 0 | 82 | 670843 | 2686976 | 8181 + (0,1) | 1 | 0 | 83 | 679024 | 2719744 | 8181 + (0,1) | 1 | 0 | 84 | 687205 | 2752512 | 8181 + (0,1) | 1 | 0 | 85 | 695386 | 2785280 | 8181 + (0,1) | 1 | 0 | 86 | 703567 | 2818048 | 8181 + (0,1) | 1 | 0 | 87 | 711748 | 2850816 | 8181 + (0,1) | 1 | 0 | 88 | 719929 | 2883584 | 8181 + (0,1) | 1 | 0 | 89 | 728110 | 2916352 | 8181 + (0,1) | 1 | 0 | 90 | 736291 | 2949120 | 8181 + (0,1) | 1 | 0 | 91 | 744472 | 2981888 | 8181 + (0,1) | 1 | 0 | 92 | 752653 | 3014656 | 8181 + (0,1) | 1 | 0 | 93 | 760834 | 3047424 | 8181 + (0,1) | 1 | 0 | 94 | 769015 | 3080192 | 8181 + (0,1) | 1 | 0 | 95 | 777196 | 3112960 | 8181 + (0,1) | 1 | 0 | 96 | 785377 | 3145728 | 8181 + (0,1) | 1 | 0 | 97 | 793558 | 3178496 | 8181 + (0,1) | 1 | 0 | 98 | 801739 | 3211264 | 8181 + (0,1) | 1 | 0 | 99 | 809920 | 3244032 | 8181 + (0,1) | 1 | 0 | 100 | 818101 | 3276800 | 8181 + (0,1) | 1 | 0 | 101 | 826282 | 3309568 | 8181 + (0,1) | 1 | 0 | 102 | 834463 | 3342336 | 8181 + (0,1) | 1 | 0 | 103 | 842644 | 3375104 | 8181 + (0,1) | 1 | 0 | 104 | 850825 | 3407872 | 8181 + (0,1) | 1 | 0 | 105 | 859006 | 3440640 | 8181 + (0,1) | 1 | 0 | 106 | 867187 | 3473408 | 8181 + (0,1) | 1 | 0 | 107 | 875368 | 3506176 | 8181 + (0,1) | 1 | 0 | 108 | 883549 | 3538944 | 8181 + (0,1) | 1 | 0 | 109 | 891730 | 3571712 | 8181 + (0,1) | 1 | 0 | 110 | 899911 | 3604480 | 8181 + (0,1) | 1 | 0 | 111 | 908092 | 3637248 | 8181 + (0,1) | 1 | 0 | 112 | 916273 | 3670016 | 8181 + (0,1) | 1 | 0 | 113 | 924454 | 3702784 | 8181 + (0,1) | 1 | 0 | 114 | 932635 | 3735552 | 8181 + (0,1) | 1 | 0 | 115 | 940816 | 3768320 | 8181 + (0,1) | 1 | 0 | 116 | 948997 | 3801088 | 8181 + (0,1) | 1 | 0 | 117 | 957178 | 3833856 | 8181 + (0,1) | 1 | 0 | 118 | 965359 | 3866624 | 8181 + (0,1) | 1 | 0 | 119 | 973540 | 3899392 | 8181 + (0,1) | 1 | 0 | 120 | 981721 | 3932160 | 8181 + (0,1) | 1 | 0 | 121 | 989902 | 3964928 | 8181 + (0,1) | 1 | 0 | 122 | 998083 | 3997696 | 8181 + (0,1) | 1 | 0 | 123 | 1006264 | 4030464 | 8181 + (0,1) | 1 | 0 | 124 | 1014445 | 4063232 | 8181 + (0,1) | 1 | 0 | 125 | 1022626 | 4096000 | 8181 + (0,1) | 1 | 0 | 126 | 1030807 | 4128768 | 8181 + (0,1) | 1 | 0 | 127 | 1038988 | 4161536 | 8181 + (0,1) | 1 | 0 | 128 | 1047169 | 4194304 | 8181 + (0,1) | 1 | 0 | 129 | 1055350 | 4227072 | 8181 + (0,1) | 1 | 0 | 130 | 1063531 | 4259840 | 8181 + (0,1) | 1 | 0 | 131 | 1071712 | 4292608 | 8181 + (0,1) | 1 | 0 | 132 | 1079893 | 4325376 | 8181 + (0,1) | 1 | 0 | 133 | 1088074 | 4358144 | 8181 + (0,1) | 1 | 0 | 134 | 1096255 | 4390912 | 8181 + (0,1) | 1 | 0 | 135 | 1104436 | 4423680 | 8181 + (0,1) | 1 | 0 | 136 | 1112617 | 4456448 | 8181 + (0,1) | 1 | 0 | 137 | 1120798 | 4489216 | 8181 + (0,1) | 1 | 0 | 138 | 1128979 | 4521984 | 8181 + (0,1) | 1 | 0 | 139 | 1137160 | 4554752 | 8181 + (0,1) | 1 | 0 | 140 | 1145341 | 4587520 | 8181 + (0,1) | 1 | 0 | 141 | 1153522 | 4620288 | 8181 + (0,1) | 1 | 0 | 142 | 1161703 | 4653056 | 8181 + (0,1) | 1 | 0 | 143 | 1169884 | 4685824 | 8181 + (0,1) | 1 | 0 | 144 | 1178065 | 4718592 | 8181 + (0,1) | 1 | 0 | 145 | 1186246 | 4751360 | 8181 + (0,1) | 1 | 0 | 146 | 1194427 | 4784128 | 8181 + (0,1) | 1 | 0 | 147 | 1202608 | 4816896 | 8181 + (0,1) | 1 | 0 | 148 | 1210789 | 4849664 | 8181 + (0,1) | 1 | 0 | 149 | 1218970 | 4882432 | 8181 + (0,1) | 1 | 0 | 150 | 1227151 | 4915200 | 8181 + (0,1) | 1 | 0 | 151 | 1235332 | 4947968 | 8181 + (0,1) | 1 | 0 | 152 | 1243513 | 4980736 | 8181 + (0,1) | 1 | 0 | 153 | 1251694 | 5013504 | 8181 + (0,1) | 1 | 0 | 154 | 1259875 | 5046272 | 8181 + (0,1) | 1 | 0 | 155 | 1268056 | 5079040 | 8181 + (0,1) | 1 | 0 | 156 | 1276237 | 5111808 | 8181 + (0,1) | 1 | 0 | 157 | 1284418 | 5144576 | 8181 + (0,1) | 1 | 0 | 158 | 1292599 | 5177344 | 8181 + (0,1) | 1 | 0 | 159 | 1300780 | 5210112 | 8181 + (0,1) | 1 | 0 | 160 | 1308961 | 5242880 | 8181 + (0,2) | 1 | 1 | 0 | 1 | 0 | 8181 + (0,2) | 1 | 1 | 1 | 8182 | 32768 | 8181 + (0,2) | 1 | 1 | 2 | 16363 | 65536 | 8181 + (0,2) | 1 | 1 | 3 | 24544 | 98304 | 8181 + (0,2) | 1 | 1 | 4 | 32725 | 131072 | 8181 + (0,2) | 1 | 1 | 5 | 40906 | 163840 | 8181 + (0,2) | 1 | 1 | 6 | 49087 | 196608 | 8181 + (0,2) | 1 | 1 | 7 | 57268 | 229376 | 8181 + (0,2) | 1 | 1 | 8 | 65449 | 262144 | 8181 + (0,2) | 1 | 1 | 9 | 73630 | 294912 | 8181 + (0,2) | 1 | 1 | 10 | 81811 | 327680 | 8181 + (0,2) | 1 | 1 | 11 | 89992 | 360448 | 8181 + (0,2) | 1 | 1 | 12 | 98173 | 393216 | 8181 + (0,2) | 1 | 1 | 13 | 106354 | 425984 | 8181 + (0,2) | 1 | 1 | 14 | 114535 | 458752 | 8181 + (0,2) | 1 | 1 | 15 | 122716 | 491520 | 8181 + (0,2) | 1 | 1 | 16 | 130897 | 524288 | 8181 + (0,2) | 1 | 1 | 17 | 139078 | 557056 | 8181 + (0,2) | 1 | 1 | 18 | 147259 | 589824 | 8181 + (0,2) | 1 | 1 | 19 | 155440 | 622592 | 8181 + (0,2) | 1 | 1 | 20 | 163621 | 655360 | 8181 + (0,2) | 1 | 1 | 21 | 171802 | 688128 | 8181 + (0,2) | 1 | 1 | 22 | 179983 | 720896 | 8181 + (0,2) | 1 | 1 | 23 | 188164 | 753664 | 8181 + (0,2) | 1 | 1 | 24 | 196345 | 786432 | 8181 + (0,2) | 1 | 1 | 25 | 204526 | 819200 | 8181 + (0,2) | 1 | 1 | 26 | 212707 | 851968 | 8181 + (0,2) | 1 | 1 | 27 | 220888 | 884736 | 8181 + (0,2) | 1 | 1 | 28 | 229069 | 917504 | 8181 + (0,2) | 1 | 1 | 29 | 237250 | 950272 | 8181 + (0,2) | 1 | 1 | 30 | 245431 | 983040 | 8181 + (0,2) | 1 | 1 | 31 | 253612 | 1015808 | 8181 + (0,2) | 1 | 1 | 32 | 261793 | 1048576 | 8181 + (0,2) | 1 | 1 | 33 | 269974 | 1081344 | 8181 + (0,2) | 1 | 1 | 34 | 278155 | 1114112 | 8181 + (0,2) | 1 | 1 | 35 | 286336 | 1146880 | 8181 + (0,2) | 1 | 1 | 36 | 294517 | 1179648 | 8181 + (0,2) | 1 | 1 | 37 | 302698 | 1212416 | 8181 + (0,2) | 1 | 1 | 38 | 310879 | 1245184 | 8181 + (0,2) | 1 | 1 | 39 | 319060 | 1277952 | 8181 + (0,2) | 1 | 1 | 40 | 327241 | 1310720 | 8181 + (0,2) | 1 | 1 | 41 | 335422 | 1343488 | 8181 + (0,2) | 1 | 1 | 42 | 343603 | 1376256 | 8181 + (0,2) | 1 | 1 | 43 | 351784 | 1409024 | 8181 + (0,2) | 1 | 1 | 44 | 359965 | 1441792 | 8181 + (0,2) | 1 | 1 | 45 | 368146 | 1474560 | 8181 + (0,2) | 1 | 1 | 46 | 376327 | 1507328 | 8181 + (0,2) | 1 | 1 | 47 | 384508 | 1540096 | 8181 + (0,2) | 1 | 1 | 48 | 392689 | 1572864 | 8181 + (0,2) | 1 | 1 | 49 | 400870 | 1605632 | 8181 + (0,2) | 1 | 1 | 50 | 409051 | 1638400 | 8181 + (0,2) | 1 | 1 | 51 | 417232 | 1671168 | 8181 + (0,2) | 1 | 1 | 52 | 425413 | 1703936 | 8181 + (0,2) | 1 | 1 | 53 | 433594 | 1736704 | 8181 + (0,2) | 1 | 1 | 54 | 441775 | 1769472 | 8181 + (0,2) | 1 | 1 | 55 | 449956 | 1802240 | 8181 + (0,2) | 1 | 1 | 56 | 458137 | 1835008 | 8181 + (0,2) | 1 | 1 | 57 | 466318 | 1867776 | 8181 + (0,2) | 1 | 1 | 58 | 474499 | 1900544 | 8181 + (0,2) | 1 | 1 | 59 | 482680 | 1933312 | 8181 + (0,2) | 1 | 1 | 60 | 490861 | 1966080 | 8181 + (0,2) | 1 | 1 | 61 | 499042 | 1998848 | 8181 + (0,2) | 1 | 1 | 62 | 507223 | 2031616 | 8181 + (0,2) | 1 | 1 | 63 | 515404 | 2064384 | 8181 + (0,2) | 1 | 1 | 64 | 523585 | 2097152 | 8181 + (0,2) | 1 | 1 | 65 | 531766 | 2129920 | 8181 + (0,2) | 1 | 1 | 66 | 539947 | 2162688 | 8181 + (0,2) | 1 | 1 | 67 | 548128 | 2195456 | 8181 + (0,2) | 1 | 1 | 68 | 556309 | 2228224 | 8181 + (0,2) | 1 | 1 | 69 | 564490 | 2260992 | 8181 + (0,2) | 1 | 1 | 70 | 572671 | 2293760 | 8181 + (0,2) | 1 | 1 | 71 | 580852 | 2326528 | 8181 + (0,2) | 1 | 1 | 72 | 589033 | 2359296 | 8181 + (0,2) | 1 | 1 | 73 | 597214 | 2392064 | 8181 + (0,2) | 1 | 1 | 74 | 605395 | 2424832 | 8181 + (0,2) | 1 | 1 | 75 | 613576 | 2457600 | 8181 + (0,2) | 1 | 1 | 76 | 621757 | 2490368 | 8181 + (0,2) | 1 | 1 | 77 | 629938 | 2523136 | 8181 + (0,2) | 1 | 1 | 78 | 638119 | 2555904 | 8181 + (0,2) | 1 | 1 | 79 | 646300 | 2588672 | 8181 + (0,2) | 1 | 1 | 80 | 654481 | 2621440 | 8181 + (0,2) | 1 | 1 | 81 | 662662 | 2654208 | 8181 + (0,2) | 1 | 1 | 82 | 670843 | 2686976 | 8181 + (0,2) | 1 | 1 | 83 | 679024 | 2719744 | 8181 + (0,2) | 1 | 1 | 84 | 687205 | 2752512 | 8181 + (0,2) | 1 | 1 | 85 | 695386 | 2785280 | 8181 + (0,2) | 1 | 1 | 86 | 703567 | 2818048 | 8181 + (0,2) | 1 | 1 | 87 | 711748 | 2850816 | 8181 + (0,2) | 1 | 1 | 88 | 719929 | 2883584 | 8181 + (0,2) | 1 | 1 | 89 | 728110 | 2916352 | 8181 + (0,2) | 1 | 1 | 90 | 736291 | 2949120 | 8181 + (0,2) | 1 | 1 | 91 | 744472 | 2981888 | 8181 + (0,2) | 1 | 1 | 92 | 752653 | 3014656 | 8181 + (0,2) | 1 | 1 | 93 | 760834 | 3047424 | 8181 + (0,2) | 1 | 1 | 94 | 769015 | 3080192 | 8181 + (0,2) | 1 | 1 | 95 | 777196 | 3112960 | 8181 + (0,2) | 1 | 1 | 96 | 785377 | 3145728 | 8181 + (0,2) | 1 | 1 | 97 | 793558 | 3178496 | 8181 + (0,2) | 1 | 1 | 98 | 801739 | 3211264 | 8181 + (0,2) | 1 | 1 | 99 | 809920 | 3244032 | 8181 + (0,2) | 1 | 1 | 100 | 818101 | 3276800 | 8181 + (0,2) | 1 | 1 | 101 | 826282 | 3309568 | 8181 + (0,2) | 1 | 1 | 102 | 834463 | 3342336 | 8181 + (0,2) | 1 | 1 | 103 | 842644 | 3375104 | 8181 + (0,2) | 1 | 1 | 104 | 850825 | 3407872 | 8181 + (0,2) | 1 | 1 | 105 | 859006 | 3440640 | 8181 + (0,2) | 1 | 1 | 106 | 867187 | 3473408 | 8181 + (0,2) | 1 | 1 | 107 | 875368 | 3506176 | 8181 + (0,2) | 1 | 1 | 108 | 883549 | 3538944 | 8181 + (0,2) | 1 | 1 | 109 | 891730 | 3571712 | 8181 + (0,2) | 1 | 1 | 110 | 899911 | 3604480 | 8181 + (0,2) | 1 | 1 | 111 | 908092 | 3637248 | 8181 + (0,2) | 1 | 1 | 112 | 916273 | 3670016 | 8181 + (0,2) | 1 | 1 | 113 | 924454 | 3702784 | 8181 + (0,2) | 1 | 1 | 114 | 932635 | 3735552 | 8181 + (0,2) | 1 | 1 | 115 | 940816 | 3768320 | 8181 + (0,2) | 1 | 1 | 116 | 948997 | 3801088 | 8181 + (0,2) | 1 | 1 | 117 | 957178 | 3833856 | 8181 + (0,2) | 1 | 1 | 118 | 965359 | 3866624 | 8181 + (0,2) | 1 | 1 | 119 | 973540 | 3899392 | 8181 + (0,2) | 1 | 1 | 120 | 981721 | 3932160 | 8181 + (0,2) | 1 | 1 | 121 | 989902 | 3964928 | 8181 + (0,2) | 1 | 1 | 122 | 998083 | 3997696 | 8181 + (0,2) | 1 | 1 | 123 | 1006264 | 4030464 | 8181 + (0,2) | 1 | 1 | 124 | 1014445 | 4063232 | 8181 + (0,2) | 1 | 1 | 125 | 1022626 | 4096000 | 8181 + (0,2) | 1 | 1 | 126 | 1030807 | 4128768 | 8181 + (0,2) | 1 | 1 | 127 | 1038988 | 4161536 | 8181 + (0,2) | 1 | 1 | 128 | 1047169 | 4194304 | 8181 + (0,2) | 1 | 1 | 129 | 1055350 | 4227072 | 8181 + (0,2) | 1 | 1 | 130 | 1063531 | 4259840 | 8181 + (0,2) | 1 | 1 | 131 | 1071712 | 4292608 | 8181 + (0,2) | 1 | 1 | 132 | 1079893 | 4325376 | 8181 + (0,2) | 1 | 1 | 133 | 1088074 | 4358144 | 8181 + (0,2) | 1 | 1 | 134 | 1096255 | 4390912 | 8181 + (0,2) | 1 | 1 | 135 | 1104436 | 4423680 | 8181 + (0,2) | 1 | 1 | 136 | 1112617 | 4456448 | 8181 + (0,2) | 1 | 1 | 137 | 1120798 | 4489216 | 8181 + (0,2) | 1 | 1 | 138 | 1128979 | 4521984 | 8181 + (0,2) | 1 | 1 | 139 | 1137160 | 4554752 | 8181 + (0,2) | 1 | 1 | 140 | 1145341 | 4587520 | 8181 + (0,2) | 1 | 1 | 141 | 1153522 | 4620288 | 8181 + (0,2) | 1 | 1 | 142 | 1161703 | 4653056 | 8181 + (0,2) | 1 | 1 | 143 | 1169884 | 4685824 | 8181 + (0,2) | 1 | 1 | 144 | 1178065 | 4718592 | 8181 + (0,2) | 1 | 1 | 145 | 1186246 | 4751360 | 8181 + (0,2) | 1 | 1 | 146 | 1194427 | 4784128 | 8181 + (0,2) | 1 | 1 | 147 | 1202608 | 4816896 | 8181 + (0,2) | 1 | 1 | 148 | 1210789 | 4849664 | 8181 + (0,2) | 1 | 1 | 149 | 1218970 | 4882432 | 8181 + (0,2) | 1 | 1 | 150 | 1227151 | 4915200 | 8181 + (0,2) | 1 | 1 | 151 | 1235332 | 4947968 | 8181 + (0,2) | 1 | 1 | 152 | 1243513 | 4980736 | 8181 + (0,2) | 1 | 1 | 153 | 1251694 | 5013504 | 8181 + (0,2) | 1 | 1 | 154 | 1259875 | 5046272 | 8181 + (0,2) | 1 | 1 | 155 | 1268056 | 5079040 | 8181 + (0,2) | 1 | 1 | 156 | 1276237 | 5111808 | 8181 + (0,2) | 1 | 1 | 157 | 1284418 | 5144576 | 8181 + (0,2) | 1 | 1 | 158 | 1292599 | 5177344 | 8181 + (0,2) | 1 | 1 | 159 | 1300780 | 5210112 | 8181 + (0,2) | 1 | 1 | 160 | 1308961 | 5242880 | 8181 + (0,3) | 1 | 0 | 0 | 1317142 | 5275648 | 2 + (0,4) | 1 | 1 | 0 | 1317142 | 5275648 | 2 +(324 rows) diff --git a/src/test/isolation2/expected/uao_crash_compaction_row.out b/src/test/isolation2/expected/uao_crash_compaction_row.out index 5dd3bc113f0..3d34cbeebbf 100644 --- a/src/test/isolation2/expected/uao_crash_compaction_row.out +++ b/src/test/isolation2/expected/uao_crash_compaction_row.out @@ -107,8 +107,8 @@ ERROR: Error on receive from seg0 127.0.0.1:7002 pid=15584: server closed the c 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase') where segment_id = 0; segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 248 | 5 | 1 | 248 | 2 | 3 | 2 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 + 0 | 1 | 248 | 5 | 1 | 248 | 2 | 4 | 2 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 (2 rows) -- do vacuum again, there should be no await-dropping segment files, no concurrent -- transactions exist this time when the VACUUM is performed. @@ -117,11 +117,11 @@ VACUUM 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 - 1 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 - 2 | 1 | 200 | 4 | 1 | 200 | 1 | 3 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 + 1 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 + 2 | 1 | 200 | 4 | 1 | 200 | 1 | 4 | 1 (5 rows) 1:INSERT INTO crash_before_cleanup_phase VALUES(1, 1, 'c'), (25, 6, 'c'); INSERT 2 @@ -143,23 +143,23 @@ UPDATE 1 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 3 | 3 | 1 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 - 2 | 1 | 328 | 6 | 3 | 328 | 3 | 3 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 3 | 4 | 1 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 + 2 | 1 | 328 | 6 | 3 | 328 | 3 | 4 | 1 (5 rows) 1:VACUUM crash_before_cleanup_phase; VACUUM 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 3 | 3 | 1 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 - 2 | 1 | 0 | 0 | 0 | 0 | 3 | 3 | 1 - 2 | 2 | 248 | 5 | 1 | 248 | 0 | 3 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 3 | 4 | 1 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 + 2 | 1 | 0 | 0 | 0 | 0 | 3 | 4 | 1 + 2 | 2 | 248 | 5 | 1 | 248 | 0 | 4 | 1 (6 rows) 1:INSERT INTO crash_before_cleanup_phase VALUES(21, 1, 'c'), (26, 1, 'c'); INSERT 2 @@ -185,9 +185,9 @@ UPDATE 1 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_vacuum_in_appendonly_insert') where segno = 1; segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 496 | 10 | 2 | 496 | 2 | 3 | 1 - 1 | 1 | 128 | 2 | 2 | 128 | 2 | 3 | 1 - 2 | 1 | 400 | 8 | 2 | 400 | 2 | 3 | 1 + 0 | 1 | 496 | 10 | 2 | 496 | 2 | 4 | 1 + 1 | 1 | 128 | 2 | 2 | 128 | 2 | 4 | 1 + 2 | 1 | 400 | 8 | 2 | 400 | 2 | 4 | 1 (3 rows) -- verify the new segment files contain no tuples. 1:SELECT sum(tupcount) FROM gp_toolkit.__gp_aoseg('crash_vacuum_in_appendonly_insert') where segno = 2; @@ -200,12 +200,12 @@ VACUUM 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_vacuum_in_appendonly_insert'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 0 | 2 | 248 | 5 | 1 | 248 | 0 | 3 | 1 - 1 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 1 | 2 | 64 | 1 | 1 | 64 | 0 | 3 | 1 - 2 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 2 | 2 | 200 | 4 | 1 | 200 | 0 | 3 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 0 | 2 | 248 | 5 | 1 | 248 | 0 | 4 | 1 + 1 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 1 | 2 | 64 | 1 | 1 | 64 | 0 | 4 | 1 + 2 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 2 | 2 | 200 | 4 | 1 | 200 | 0 | 4 | 1 (6 rows) 1:INSERT INTO crash_vacuum_in_appendonly_insert VALUES(21, 1, 'c'), (26, 1, 'c'); INSERT 2 @@ -274,11 +274,11 @@ server closed the connection unexpectedly 4:SELECT * FROM gp_toolkit.__gp_aoseg('crash_master_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 248 | 5 | 1 | 248 | 2 | 3 | 2 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 2 | 3 | 2 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 - 2 | 1 | 200 | 4 | 1 | 200 | 1 | 3 | 1 + 0 | 1 | 248 | 5 | 1 | 248 | 2 | 4 | 2 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 2 | 4 | 2 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 + 2 | 1 | 200 | 4 | 1 | 200 | 1 | 4 | 1 (5 rows) 4:INSERT INTO crash_master_before_cleanup_phase VALUES(1, 1, 'c'), (25, 6, 'c'); INSERT 2 @@ -300,23 +300,23 @@ UPDATE 1 4:SELECT * FROM gp_toolkit.__gp_aoseg('crash_master_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 248 | 5 | 1 | 248 | 2 | 3 | 2 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 2 | 3 | 2 - 1 | 2 | 64 | 1 | 1 | 64 | 1 | 3 | 1 - 2 | 1 | 328 | 6 | 3 | 328 | 3 | 3 | 1 + 0 | 1 | 248 | 5 | 1 | 248 | 2 | 4 | 2 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 2 | 4 | 2 + 1 | 2 | 64 | 1 | 1 | 64 | 1 | 4 | 1 + 2 | 1 | 328 | 6 | 3 | 328 | 3 | 4 | 1 (5 rows) 4:VACUUM crash_master_before_cleanup_phase; VACUUM 4:SELECT * FROM gp_toolkit.__gp_aoseg('crash_master_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 - 1 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 - 1 | 2 | 64 | 1 | 1 | 64 | 1 | 3 | 1 - 2 | 1 | 0 | 0 | 0 | 0 | 3 | 3 | 1 - 2 | 2 | 248 | 5 | 1 | 248 | 0 | 3 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 + 1 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 + 1 | 2 | 64 | 1 | 1 | 64 | 1 | 4 | 1 + 2 | 1 | 0 | 0 | 0 | 0 | 3 | 4 | 1 + 2 | 2 | 248 | 5 | 1 | 248 | 0 | 4 | 1 (6 rows) 4:INSERT INTO crash_master_before_cleanup_phase VALUES(21, 1, 'c'), (26, 1, 'c'); INSERT 2 diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index e89566e9d67..3437378f9a0 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -89,7 +89,7 @@ test: distributedlog-bug test: invalidated_toast_index test: distributed_snapshot test: gp_collation -test: ao_upgrade +test: ao_upgrade ao_blkdir test: bitmap_index_concurrent test: bitmap_index_crash test: bitmap_update_words_backup_block diff --git a/src/test/isolation2/output/uao/compaction_utility_insert.source b/src/test/isolation2/output/uao/compaction_utility_insert.source index ba037d2ee6c..8b40bc4763e 100644 --- a/src/test/isolation2/output/uao/compaction_utility_insert.source +++ b/src/test/isolation2/output/uao/compaction_utility_insert.source @@ -14,7 +14,7 @@ INSERT 1 SELECT *, segno, tupcount, state FROM gp_ao_or_aocs_seg('foo'); segment_id | segno | tupcount | modcount | formatversion | state | segno | tupcount | state ------------+-------+----------+----------+---------------+-------+-------+----------+------- - 0 | 0 | 2 | 2 | 3 | 1 | 0 | 2 | 1 + 0 | 0 | 2 | 2 | 4 | 1 | 0 | 2 | 1 (1 row) DELETE FROM foo WHERE a = 2; DELETE 1 diff --git a/src/test/isolation2/output/uao/max_concurrency.source b/src/test/isolation2/output/uao/max_concurrency.source index 9423f38f5e6..bead35a6647 100644 --- a/src/test/isolation2/output/uao/max_concurrency.source +++ b/src/test/isolation2/output/uao/max_concurrency.source @@ -904,133 +904,133 @@ SELECT * FROM ao; SELECT * FROM gp_ao_or_aocs_seg('ao') ORDER BY segno; segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 1 | 1 | 1 | 1 | 3 | 1 - 1 | 2 | 1 | 1 | 3 | 1 - 1 | 3 | 1 | 1 | 3 | 1 - 1 | 4 | 1 | 1 | 3 | 1 - 1 | 5 | 1 | 1 | 3 | 1 - 1 | 6 | 1 | 1 | 3 | 1 - 1 | 7 | 1 | 1 | 3 | 1 - 1 | 8 | 1 | 1 | 3 | 1 - 1 | 9 | 1 | 1 | 3 | 1 - 1 | 10 | 1 | 1 | 3 | 1 - 1 | 11 | 1 | 1 | 3 | 1 - 1 | 12 | 1 | 1 | 3 | 1 - 1 | 13 | 1 | 1 | 3 | 1 - 1 | 14 | 1 | 1 | 3 | 1 - 1 | 15 | 1 | 1 | 3 | 1 - 1 | 16 | 1 | 1 | 3 | 1 - 1 | 17 | 1 | 1 | 3 | 1 - 1 | 18 | 1 | 1 | 3 | 1 - 1 | 19 | 1 | 1 | 3 | 1 - 1 | 20 | 1 | 1 | 3 | 1 - 1 | 21 | 1 | 1 | 3 | 1 - 1 | 22 | 1 | 1 | 3 | 1 - 1 | 23 | 1 | 1 | 3 | 1 - 1 | 24 | 1 | 1 | 3 | 1 - 1 | 25 | 1 | 1 | 3 | 1 - 1 | 26 | 1 | 1 | 3 | 1 - 1 | 27 | 1 | 1 | 3 | 1 - 1 | 28 | 1 | 1 | 3 | 1 - 1 | 29 | 1 | 1 | 3 | 1 - 1 | 30 | 1 | 1 | 3 | 1 - 1 | 31 | 1 | 1 | 3 | 1 - 1 | 32 | 1 | 1 | 3 | 1 - 1 | 33 | 1 | 1 | 3 | 1 - 1 | 34 | 1 | 1 | 3 | 1 - 1 | 35 | 1 | 1 | 3 | 1 - 1 | 36 | 1 | 1 | 3 | 1 - 1 | 37 | 1 | 1 | 3 | 1 - 1 | 38 | 1 | 1 | 3 | 1 - 1 | 39 | 1 | 1 | 3 | 1 - 1 | 40 | 1 | 1 | 3 | 1 - 1 | 41 | 1 | 1 | 3 | 1 - 1 | 42 | 1 | 1 | 3 | 1 - 1 | 43 | 1 | 1 | 3 | 1 - 1 | 44 | 1 | 1 | 3 | 1 - 1 | 45 | 1 | 1 | 3 | 1 - 1 | 46 | 1 | 1 | 3 | 1 - 1 | 47 | 1 | 1 | 3 | 1 - 1 | 48 | 1 | 1 | 3 | 1 - 1 | 49 | 1 | 1 | 3 | 1 - 1 | 50 | 1 | 1 | 3 | 1 - 1 | 51 | 1 | 1 | 3 | 1 - 1 | 52 | 1 | 1 | 3 | 1 - 1 | 53 | 1 | 1 | 3 | 1 - 1 | 54 | 1 | 1 | 3 | 1 - 1 | 55 | 1 | 1 | 3 | 1 - 1 | 56 | 1 | 1 | 3 | 1 - 1 | 57 | 1 | 1 | 3 | 1 - 1 | 58 | 1 | 1 | 3 | 1 - 1 | 59 | 1 | 1 | 3 | 1 - 1 | 60 | 1 | 1 | 3 | 1 - 1 | 61 | 1 | 1 | 3 | 1 - 1 | 62 | 1 | 1 | 3 | 1 - 1 | 63 | 1 | 1 | 3 | 1 - 1 | 64 | 1 | 1 | 3 | 1 - 1 | 65 | 1 | 1 | 3 | 1 - 1 | 66 | 1 | 1 | 3 | 1 - 1 | 67 | 1 | 1 | 3 | 1 - 1 | 68 | 1 | 1 | 3 | 1 - 1 | 69 | 1 | 1 | 3 | 1 - 1 | 70 | 1 | 1 | 3 | 1 - 1 | 71 | 1 | 1 | 3 | 1 - 1 | 72 | 1 | 1 | 3 | 1 - 1 | 73 | 1 | 1 | 3 | 1 - 1 | 74 | 1 | 1 | 3 | 1 - 1 | 75 | 1 | 1 | 3 | 1 - 1 | 76 | 1 | 1 | 3 | 1 - 1 | 77 | 1 | 1 | 3 | 1 - 1 | 78 | 1 | 1 | 3 | 1 - 1 | 79 | 1 | 1 | 3 | 1 - 1 | 80 | 1 | 1 | 3 | 1 - 1 | 81 | 1 | 1 | 3 | 1 - 1 | 82 | 1 | 1 | 3 | 1 - 1 | 83 | 1 | 1 | 3 | 1 - 1 | 84 | 1 | 1 | 3 | 1 - 1 | 85 | 1 | 1 | 3 | 1 - 1 | 86 | 1 | 1 | 3 | 1 - 1 | 87 | 1 | 1 | 3 | 1 - 1 | 88 | 1 | 1 | 3 | 1 - 1 | 89 | 1 | 1 | 3 | 1 - 1 | 90 | 1 | 1 | 3 | 1 - 1 | 91 | 1 | 1 | 3 | 1 - 1 | 92 | 1 | 1 | 3 | 1 - 1 | 93 | 1 | 1 | 3 | 1 - 1 | 94 | 1 | 1 | 3 | 1 - 1 | 95 | 1 | 1 | 3 | 1 - 1 | 96 | 1 | 1 | 3 | 1 - 1 | 97 | 1 | 1 | 3 | 1 - 1 | 98 | 1 | 1 | 3 | 1 - 1 | 99 | 1 | 1 | 3 | 1 - 1 | 100 | 1 | 1 | 3 | 1 - 1 | 101 | 1 | 1 | 3 | 1 - 1 | 102 | 1 | 1 | 3 | 1 - 1 | 103 | 1 | 1 | 3 | 1 - 1 | 104 | 1 | 1 | 3 | 1 - 1 | 105 | 1 | 1 | 3 | 1 - 1 | 106 | 1 | 1 | 3 | 1 - 1 | 107 | 1 | 1 | 3 | 1 - 1 | 108 | 1 | 1 | 3 | 1 - 1 | 109 | 1 | 1 | 3 | 1 - 1 | 110 | 1 | 1 | 3 | 1 - 1 | 111 | 1 | 1 | 3 | 1 - 1 | 112 | 1 | 1 | 3 | 1 - 1 | 113 | 1 | 1 | 3 | 1 - 1 | 114 | 1 | 1 | 3 | 1 - 1 | 115 | 1 | 1 | 3 | 1 - 1 | 116 | 1 | 1 | 3 | 1 - 1 | 117 | 1 | 1 | 3 | 1 - 1 | 118 | 1 | 1 | 3 | 1 - 1 | 119 | 1 | 1 | 3 | 1 - 1 | 120 | 1 | 1 | 3 | 1 - 1 | 121 | 1 | 1 | 3 | 1 - 1 | 122 | 1 | 1 | 3 | 1 - 1 | 123 | 1 | 1 | 3 | 1 - 1 | 124 | 1 | 1 | 3 | 1 - 1 | 125 | 1 | 1 | 3 | 1 - 1 | 126 | 1 | 1 | 3 | 1 - 1 | 127 | 1 | 1 | 3 | 1 + 1 | 1 | 1 | 1 | 4 | 1 + 1 | 2 | 1 | 1 | 4 | 1 + 1 | 3 | 1 | 1 | 4 | 1 + 1 | 4 | 1 | 1 | 4 | 1 + 1 | 5 | 1 | 1 | 4 | 1 + 1 | 6 | 1 | 1 | 4 | 1 + 1 | 7 | 1 | 1 | 4 | 1 + 1 | 8 | 1 | 1 | 4 | 1 + 1 | 9 | 1 | 1 | 4 | 1 + 1 | 10 | 1 | 1 | 4 | 1 + 1 | 11 | 1 | 1 | 4 | 1 + 1 | 12 | 1 | 1 | 4 | 1 + 1 | 13 | 1 | 1 | 4 | 1 + 1 | 14 | 1 | 1 | 4 | 1 + 1 | 15 | 1 | 1 | 4 | 1 + 1 | 16 | 1 | 1 | 4 | 1 + 1 | 17 | 1 | 1 | 4 | 1 + 1 | 18 | 1 | 1 | 4 | 1 + 1 | 19 | 1 | 1 | 4 | 1 + 1 | 20 | 1 | 1 | 4 | 1 + 1 | 21 | 1 | 1 | 4 | 1 + 1 | 22 | 1 | 1 | 4 | 1 + 1 | 23 | 1 | 1 | 4 | 1 + 1 | 24 | 1 | 1 | 4 | 1 + 1 | 25 | 1 | 1 | 4 | 1 + 1 | 26 | 1 | 1 | 4 | 1 + 1 | 27 | 1 | 1 | 4 | 1 + 1 | 28 | 1 | 1 | 4 | 1 + 1 | 29 | 1 | 1 | 4 | 1 + 1 | 30 | 1 | 1 | 4 | 1 + 1 | 31 | 1 | 1 | 4 | 1 + 1 | 32 | 1 | 1 | 4 | 1 + 1 | 33 | 1 | 1 | 4 | 1 + 1 | 34 | 1 | 1 | 4 | 1 + 1 | 35 | 1 | 1 | 4 | 1 + 1 | 36 | 1 | 1 | 4 | 1 + 1 | 37 | 1 | 1 | 4 | 1 + 1 | 38 | 1 | 1 | 4 | 1 + 1 | 39 | 1 | 1 | 4 | 1 + 1 | 40 | 1 | 1 | 4 | 1 + 1 | 41 | 1 | 1 | 4 | 1 + 1 | 42 | 1 | 1 | 4 | 1 + 1 | 43 | 1 | 1 | 4 | 1 + 1 | 44 | 1 | 1 | 4 | 1 + 1 | 45 | 1 | 1 | 4 | 1 + 1 | 46 | 1 | 1 | 4 | 1 + 1 | 47 | 1 | 1 | 4 | 1 + 1 | 48 | 1 | 1 | 4 | 1 + 1 | 49 | 1 | 1 | 4 | 1 + 1 | 50 | 1 | 1 | 4 | 1 + 1 | 51 | 1 | 1 | 4 | 1 + 1 | 52 | 1 | 1 | 4 | 1 + 1 | 53 | 1 | 1 | 4 | 1 + 1 | 54 | 1 | 1 | 4 | 1 + 1 | 55 | 1 | 1 | 4 | 1 + 1 | 56 | 1 | 1 | 4 | 1 + 1 | 57 | 1 | 1 | 4 | 1 + 1 | 58 | 1 | 1 | 4 | 1 + 1 | 59 | 1 | 1 | 4 | 1 + 1 | 60 | 1 | 1 | 4 | 1 + 1 | 61 | 1 | 1 | 4 | 1 + 1 | 62 | 1 | 1 | 4 | 1 + 1 | 63 | 1 | 1 | 4 | 1 + 1 | 64 | 1 | 1 | 4 | 1 + 1 | 65 | 1 | 1 | 4 | 1 + 1 | 66 | 1 | 1 | 4 | 1 + 1 | 67 | 1 | 1 | 4 | 1 + 1 | 68 | 1 | 1 | 4 | 1 + 1 | 69 | 1 | 1 | 4 | 1 + 1 | 70 | 1 | 1 | 4 | 1 + 1 | 71 | 1 | 1 | 4 | 1 + 1 | 72 | 1 | 1 | 4 | 1 + 1 | 73 | 1 | 1 | 4 | 1 + 1 | 74 | 1 | 1 | 4 | 1 + 1 | 75 | 1 | 1 | 4 | 1 + 1 | 76 | 1 | 1 | 4 | 1 + 1 | 77 | 1 | 1 | 4 | 1 + 1 | 78 | 1 | 1 | 4 | 1 + 1 | 79 | 1 | 1 | 4 | 1 + 1 | 80 | 1 | 1 | 4 | 1 + 1 | 81 | 1 | 1 | 4 | 1 + 1 | 82 | 1 | 1 | 4 | 1 + 1 | 83 | 1 | 1 | 4 | 1 + 1 | 84 | 1 | 1 | 4 | 1 + 1 | 85 | 1 | 1 | 4 | 1 + 1 | 86 | 1 | 1 | 4 | 1 + 1 | 87 | 1 | 1 | 4 | 1 + 1 | 88 | 1 | 1 | 4 | 1 + 1 | 89 | 1 | 1 | 4 | 1 + 1 | 90 | 1 | 1 | 4 | 1 + 1 | 91 | 1 | 1 | 4 | 1 + 1 | 92 | 1 | 1 | 4 | 1 + 1 | 93 | 1 | 1 | 4 | 1 + 1 | 94 | 1 | 1 | 4 | 1 + 1 | 95 | 1 | 1 | 4 | 1 + 1 | 96 | 1 | 1 | 4 | 1 + 1 | 97 | 1 | 1 | 4 | 1 + 1 | 98 | 1 | 1 | 4 | 1 + 1 | 99 | 1 | 1 | 4 | 1 + 1 | 100 | 1 | 1 | 4 | 1 + 1 | 101 | 1 | 1 | 4 | 1 + 1 | 102 | 1 | 1 | 4 | 1 + 1 | 103 | 1 | 1 | 4 | 1 + 1 | 104 | 1 | 1 | 4 | 1 + 1 | 105 | 1 | 1 | 4 | 1 + 1 | 106 | 1 | 1 | 4 | 1 + 1 | 107 | 1 | 1 | 4 | 1 + 1 | 108 | 1 | 1 | 4 | 1 + 1 | 109 | 1 | 1 | 4 | 1 + 1 | 110 | 1 | 1 | 4 | 1 + 1 | 111 | 1 | 1 | 4 | 1 + 1 | 112 | 1 | 1 | 4 | 1 + 1 | 113 | 1 | 1 | 4 | 1 + 1 | 114 | 1 | 1 | 4 | 1 + 1 | 115 | 1 | 1 | 4 | 1 + 1 | 116 | 1 | 1 | 4 | 1 + 1 | 117 | 1 | 1 | 4 | 1 + 1 | 118 | 1 | 1 | 4 | 1 + 1 | 119 | 1 | 1 | 4 | 1 + 1 | 120 | 1 | 1 | 4 | 1 + 1 | 121 | 1 | 1 | 4 | 1 + 1 | 122 | 1 | 1 | 4 | 1 + 1 | 123 | 1 | 1 | 4 | 1 + 1 | 124 | 1 | 1 | 4 | 1 + 1 | 125 | 1 | 1 | 4 | 1 + 1 | 126 | 1 | 1 | 4 | 1 + 1 | 127 | 1 | 1 | 4 | 1 (127 rows) ALTER RESOURCE GROUP admin_group SET CONCURRENCY 20; diff --git a/src/test/isolation2/output/uao/max_concurrency2.source b/src/test/isolation2/output/uao/max_concurrency2.source index a83f9de1bdc..49592999be2 100644 --- a/src/test/isolation2/output/uao/max_concurrency2.source +++ b/src/test/isolation2/output/uao/max_concurrency2.source @@ -918,133 +918,133 @@ SELECT * FROM ao; SELECT * FROM gp_ao_or_aocs_seg('ao') ORDER BY segno; segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 1 | 1 | 1 | 1 | 3 | 1 - 1 | 2 | 1 | 1 | 3 | 1 - 1 | 3 | 1 | 1 | 3 | 1 - 1 | 4 | 1 | 1 | 3 | 1 - 1 | 5 | 1 | 1 | 3 | 1 - 1 | 6 | 1 | 1 | 3 | 1 - 1 | 7 | 1 | 1 | 3 | 1 - 1 | 8 | 1 | 1 | 3 | 1 - 1 | 9 | 1 | 1 | 3 | 1 - 1 | 10 | 1 | 1 | 3 | 1 - 1 | 11 | 1 | 1 | 3 | 1 - 1 | 12 | 1 | 1 | 3 | 1 - 1 | 13 | 1 | 1 | 3 | 1 - 1 | 14 | 1 | 1 | 3 | 1 - 1 | 15 | 1 | 1 | 3 | 1 - 1 | 16 | 1 | 1 | 3 | 1 - 1 | 17 | 1 | 1 | 3 | 1 - 1 | 18 | 1 | 1 | 3 | 1 - 1 | 19 | 1 | 1 | 3 | 1 - 1 | 20 | 1 | 1 | 3 | 1 - 1 | 21 | 1 | 1 | 3 | 1 - 1 | 22 | 1 | 1 | 3 | 1 - 1 | 23 | 1 | 1 | 3 | 1 - 1 | 24 | 1 | 1 | 3 | 1 - 1 | 25 | 1 | 1 | 3 | 1 - 1 | 26 | 1 | 1 | 3 | 1 - 1 | 27 | 1 | 1 | 3 | 1 - 1 | 28 | 1 | 1 | 3 | 1 - 1 | 29 | 1 | 1 | 3 | 1 - 1 | 30 | 1 | 1 | 3 | 1 - 1 | 31 | 1 | 1 | 3 | 1 - 1 | 32 | 1 | 1 | 3 | 1 - 1 | 33 | 1 | 1 | 3 | 1 - 1 | 34 | 1 | 1 | 3 | 1 - 1 | 35 | 1 | 1 | 3 | 1 - 1 | 36 | 1 | 1 | 3 | 1 - 1 | 37 | 1 | 1 | 3 | 1 - 1 | 38 | 1 | 1 | 3 | 1 - 1 | 39 | 1 | 1 | 3 | 1 - 1 | 40 | 1 | 1 | 3 | 1 - 1 | 41 | 1 | 1 | 3 | 1 - 1 | 42 | 1 | 1 | 3 | 1 - 1 | 43 | 1 | 1 | 3 | 1 - 1 | 44 | 1 | 1 | 3 | 1 - 1 | 45 | 1 | 1 | 3 | 1 - 1 | 46 | 1 | 1 | 3 | 1 - 1 | 47 | 1 | 1 | 3 | 1 - 1 | 48 | 1 | 1 | 3 | 1 - 1 | 49 | 1 | 1 | 3 | 1 - 1 | 50 | 1 | 1 | 3 | 1 - 1 | 51 | 1 | 1 | 3 | 1 - 1 | 52 | 1 | 1 | 3 | 1 - 1 | 53 | 1 | 1 | 3 | 1 - 1 | 54 | 1 | 1 | 3 | 1 - 1 | 55 | 1 | 1 | 3 | 1 - 1 | 56 | 1 | 1 | 3 | 1 - 1 | 57 | 1 | 1 | 3 | 1 - 1 | 58 | 1 | 1 | 3 | 1 - 1 | 59 | 1 | 1 | 3 | 1 - 1 | 60 | 1 | 1 | 3 | 1 - 1 | 61 | 1 | 1 | 3 | 1 - 1 | 62 | 1 | 1 | 3 | 1 - 1 | 63 | 1 | 1 | 3 | 1 - 1 | 64 | 1 | 1 | 3 | 1 - 1 | 65 | 1 | 1 | 3 | 1 - 1 | 66 | 1 | 1 | 3 | 1 - 1 | 67 | 1 | 1 | 3 | 1 - 1 | 68 | 1 | 1 | 3 | 1 - 1 | 69 | 1 | 1 | 3 | 1 - 1 | 70 | 1 | 1 | 3 | 1 - 1 | 71 | 1 | 1 | 3 | 1 - 1 | 72 | 1 | 1 | 3 | 1 - 1 | 73 | 1 | 1 | 3 | 1 - 1 | 74 | 1 | 1 | 3 | 1 - 1 | 75 | 1 | 1 | 3 | 1 - 1 | 76 | 1 | 1 | 3 | 1 - 1 | 77 | 1 | 1 | 3 | 1 - 1 | 78 | 1 | 1 | 3 | 1 - 1 | 79 | 1 | 1 | 3 | 1 - 1 | 80 | 1 | 1 | 3 | 1 - 1 | 81 | 1 | 1 | 3 | 1 - 1 | 82 | 1 | 1 | 3 | 1 - 1 | 83 | 1 | 1 | 3 | 1 - 1 | 84 | 1 | 1 | 3 | 1 - 1 | 85 | 1 | 1 | 3 | 1 - 1 | 86 | 1 | 1 | 3 | 1 - 1 | 87 | 1 | 1 | 3 | 1 - 1 | 88 | 1 | 1 | 3 | 1 - 1 | 89 | 1 | 1 | 3 | 1 - 1 | 90 | 1 | 1 | 3 | 1 - 1 | 91 | 1 | 1 | 3 | 1 - 1 | 92 | 1 | 1 | 3 | 1 - 1 | 93 | 1 | 1 | 3 | 1 - 1 | 94 | 1 | 1 | 3 | 1 - 1 | 95 | 1 | 1 | 3 | 1 - 1 | 96 | 1 | 1 | 3 | 1 - 1 | 97 | 1 | 1 | 3 | 1 - 1 | 98 | 1 | 1 | 3 | 1 - 1 | 99 | 1 | 1 | 3 | 1 - 1 | 100 | 1 | 1 | 3 | 1 - 1 | 101 | 1 | 1 | 3 | 1 - 1 | 102 | 1 | 1 | 3 | 1 - 1 | 103 | 1 | 1 | 3 | 1 - 1 | 104 | 1 | 1 | 3 | 1 - 1 | 105 | 1 | 1 | 3 | 1 - 1 | 106 | 1 | 1 | 3 | 1 - 1 | 107 | 1 | 1 | 3 | 1 - 1 | 108 | 1 | 1 | 3 | 1 - 1 | 109 | 1 | 1 | 3 | 1 - 1 | 110 | 1 | 1 | 3 | 1 - 1 | 111 | 1 | 1 | 3 | 1 - 1 | 112 | 1 | 1 | 3 | 1 - 1 | 113 | 1 | 1 | 3 | 1 - 1 | 114 | 1 | 1 | 3 | 1 - 1 | 115 | 1 | 1 | 3 | 1 - 1 | 116 | 1 | 1 | 3 | 1 - 1 | 117 | 1 | 1 | 3 | 1 - 1 | 118 | 1 | 1 | 3 | 1 - 1 | 119 | 1 | 1 | 3 | 1 - 1 | 120 | 1 | 1 | 3 | 1 - 1 | 121 | 1 | 1 | 3 | 1 - 1 | 122 | 1 | 1 | 3 | 1 - 1 | 123 | 1 | 1 | 3 | 1 - 1 | 124 | 1 | 1 | 3 | 1 - 1 | 125 | 1 | 1 | 3 | 1 - 1 | 126 | 1 | 1 | 3 | 1 - 1 | 127 | 1 | 1 | 3 | 1 + 1 | 1 | 1 | 1 | 4 | 1 + 1 | 2 | 1 | 1 | 4 | 1 + 1 | 3 | 1 | 1 | 4 | 1 + 1 | 4 | 1 | 1 | 4 | 1 + 1 | 5 | 1 | 1 | 4 | 1 + 1 | 6 | 1 | 1 | 4 | 1 + 1 | 7 | 1 | 1 | 4 | 1 + 1 | 8 | 1 | 1 | 4 | 1 + 1 | 9 | 1 | 1 | 4 | 1 + 1 | 10 | 1 | 1 | 4 | 1 + 1 | 11 | 1 | 1 | 4 | 1 + 1 | 12 | 1 | 1 | 4 | 1 + 1 | 13 | 1 | 1 | 4 | 1 + 1 | 14 | 1 | 1 | 4 | 1 + 1 | 15 | 1 | 1 | 4 | 1 + 1 | 16 | 1 | 1 | 4 | 1 + 1 | 17 | 1 | 1 | 4 | 1 + 1 | 18 | 1 | 1 | 4 | 1 + 1 | 19 | 1 | 1 | 4 | 1 + 1 | 20 | 1 | 1 | 4 | 1 + 1 | 21 | 1 | 1 | 4 | 1 + 1 | 22 | 1 | 1 | 4 | 1 + 1 | 23 | 1 | 1 | 4 | 1 + 1 | 24 | 1 | 1 | 4 | 1 + 1 | 25 | 1 | 1 | 4 | 1 + 1 | 26 | 1 | 1 | 4 | 1 + 1 | 27 | 1 | 1 | 4 | 1 + 1 | 28 | 1 | 1 | 4 | 1 + 1 | 29 | 1 | 1 | 4 | 1 + 1 | 30 | 1 | 1 | 4 | 1 + 1 | 31 | 1 | 1 | 4 | 1 + 1 | 32 | 1 | 1 | 4 | 1 + 1 | 33 | 1 | 1 | 4 | 1 + 1 | 34 | 1 | 1 | 4 | 1 + 1 | 35 | 1 | 1 | 4 | 1 + 1 | 36 | 1 | 1 | 4 | 1 + 1 | 37 | 1 | 1 | 4 | 1 + 1 | 38 | 1 | 1 | 4 | 1 + 1 | 39 | 1 | 1 | 4 | 1 + 1 | 40 | 1 | 1 | 4 | 1 + 1 | 41 | 1 | 1 | 4 | 1 + 1 | 42 | 1 | 1 | 4 | 1 + 1 | 43 | 1 | 1 | 4 | 1 + 1 | 44 | 1 | 1 | 4 | 1 + 1 | 45 | 1 | 1 | 4 | 1 + 1 | 46 | 1 | 1 | 4 | 1 + 1 | 47 | 1 | 1 | 4 | 1 + 1 | 48 | 1 | 1 | 4 | 1 + 1 | 49 | 1 | 1 | 4 | 1 + 1 | 50 | 1 | 1 | 4 | 1 + 1 | 51 | 1 | 1 | 4 | 1 + 1 | 52 | 1 | 1 | 4 | 1 + 1 | 53 | 1 | 1 | 4 | 1 + 1 | 54 | 1 | 1 | 4 | 1 + 1 | 55 | 1 | 1 | 4 | 1 + 1 | 56 | 1 | 1 | 4 | 1 + 1 | 57 | 1 | 1 | 4 | 1 + 1 | 58 | 1 | 1 | 4 | 1 + 1 | 59 | 1 | 1 | 4 | 1 + 1 | 60 | 1 | 1 | 4 | 1 + 1 | 61 | 1 | 1 | 4 | 1 + 1 | 62 | 1 | 1 | 4 | 1 + 1 | 63 | 1 | 1 | 4 | 1 + 1 | 64 | 1 | 1 | 4 | 1 + 1 | 65 | 1 | 1 | 4 | 1 + 1 | 66 | 1 | 1 | 4 | 1 + 1 | 67 | 1 | 1 | 4 | 1 + 1 | 68 | 1 | 1 | 4 | 1 + 1 | 69 | 1 | 1 | 4 | 1 + 1 | 70 | 1 | 1 | 4 | 1 + 1 | 71 | 1 | 1 | 4 | 1 + 1 | 72 | 1 | 1 | 4 | 1 + 1 | 73 | 1 | 1 | 4 | 1 + 1 | 74 | 1 | 1 | 4 | 1 + 1 | 75 | 1 | 1 | 4 | 1 + 1 | 76 | 1 | 1 | 4 | 1 + 1 | 77 | 1 | 1 | 4 | 1 + 1 | 78 | 1 | 1 | 4 | 1 + 1 | 79 | 1 | 1 | 4 | 1 + 1 | 80 | 1 | 1 | 4 | 1 + 1 | 81 | 1 | 1 | 4 | 1 + 1 | 82 | 1 | 1 | 4 | 1 + 1 | 83 | 1 | 1 | 4 | 1 + 1 | 84 | 1 | 1 | 4 | 1 + 1 | 85 | 1 | 1 | 4 | 1 + 1 | 86 | 1 | 1 | 4 | 1 + 1 | 87 | 1 | 1 | 4 | 1 + 1 | 88 | 1 | 1 | 4 | 1 + 1 | 89 | 1 | 1 | 4 | 1 + 1 | 90 | 1 | 1 | 4 | 1 + 1 | 91 | 1 | 1 | 4 | 1 + 1 | 92 | 1 | 1 | 4 | 1 + 1 | 93 | 1 | 1 | 4 | 1 + 1 | 94 | 1 | 1 | 4 | 1 + 1 | 95 | 1 | 1 | 4 | 1 + 1 | 96 | 1 | 1 | 4 | 1 + 1 | 97 | 1 | 1 | 4 | 1 + 1 | 98 | 1 | 1 | 4 | 1 + 1 | 99 | 1 | 1 | 4 | 1 + 1 | 100 | 1 | 1 | 4 | 1 + 1 | 101 | 1 | 1 | 4 | 1 + 1 | 102 | 1 | 1 | 4 | 1 + 1 | 103 | 1 | 1 | 4 | 1 + 1 | 104 | 1 | 1 | 4 | 1 + 1 | 105 | 1 | 1 | 4 | 1 + 1 | 106 | 1 | 1 | 4 | 1 + 1 | 107 | 1 | 1 | 4 | 1 + 1 | 108 | 1 | 1 | 4 | 1 + 1 | 109 | 1 | 1 | 4 | 1 + 1 | 110 | 1 | 1 | 4 | 1 + 1 | 111 | 1 | 1 | 4 | 1 + 1 | 112 | 1 | 1 | 4 | 1 + 1 | 113 | 1 | 1 | 4 | 1 + 1 | 114 | 1 | 1 | 4 | 1 + 1 | 115 | 1 | 1 | 4 | 1 + 1 | 116 | 1 | 1 | 4 | 1 + 1 | 117 | 1 | 1 | 4 | 1 + 1 | 118 | 1 | 1 | 4 | 1 + 1 | 119 | 1 | 1 | 4 | 1 + 1 | 120 | 1 | 1 | 4 | 1 + 1 | 121 | 1 | 1 | 4 | 1 + 1 | 122 | 1 | 1 | 4 | 1 + 1 | 123 | 1 | 1 | 4 | 1 + 1 | 124 | 1 | 1 | 4 | 1 + 1 | 125 | 1 | 1 | 4 | 1 + 1 | 126 | 1 | 1 | 4 | 1 + 1 | 127 | 1 | 1 | 4 | 1 (127 rows) ALTER RESOURCE GROUP admin_group SET CONCURRENCY 20; diff --git a/src/test/isolation2/output/uao/select_after_vacuum.source b/src/test/isolation2/output/uao/select_after_vacuum.source index 5227d1fe7c1..3a01ec94561 100644 --- a/src/test/isolation2/output/uao/select_after_vacuum.source +++ b/src/test/isolation2/output/uao/select_after_vacuum.source @@ -109,10 +109,10 @@ INSERT 1 0: SELECT * FROM gp_ao_or_aocs_seg('ao'); segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 0 | 1 | 7098 | 22 | 3 | 2 - 0 | 2 | 6069 | 0 | 3 | 1 - 1 | 1 | 6762 | 22 | 3 | 2 - 1 | 2 | 5923 | 1 | 3 | 1 - 2 | 1 | 7140 | 22 | 3 | 2 - 2 | 2 | 6342 | 0 | 3 | 1 + 0 | 1 | 7098 | 22 | 4 | 2 + 0 | 2 | 6069 | 0 | 4 | 1 + 1 | 1 | 6762 | 22 | 4 | 2 + 1 | 2 | 5923 | 1 | 4 | 1 + 2 | 1 | 7140 | 22 | 4 | 2 + 2 | 2 | 6342 | 0 | 4 | 1 (6 rows) diff --git a/src/test/isolation2/output/uao/vacuum_cleanup.source b/src/test/isolation2/output/uao/vacuum_cleanup.source index 920ddfaf530..430853cc420 100644 --- a/src/test/isolation2/output/uao/vacuum_cleanup.source +++ b/src/test/isolation2/output/uao/vacuum_cleanup.source @@ -114,13 +114,348 @@ VACUUM 1: SELECT * FROM gp_ao_or_aocs_seg('ao_@amname@_vacuum_cleanup3'); segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 2 | 1 | 25 | 2 | 3 | 2 - 2 | 2 | 0 | 0 | 3 | 1 - 1 | 1 | 37 | 2 | 3 | 2 - 1 | 2 | 0 | 0 | 3 | 1 - 0 | 1 | 38 | 2 | 3 | 2 - 0 | 2 | 0 | 0 | 3 | 1 + 0 | 1 | 0 | 2 | 4 | 1 + 0 | 2 | 0 | 0 | 4 | 1 + 2 | 1 | 0 | 2 | 4 | 1 + 2 | 2 | 0 | 0 | 4 | 1 + 1 | 1 | 0 | 2 | 4 | 1 + 1 | 2 | 0 | 0 | 4 | 1 (6 rows) 2: commit; COMMIT + +1q: ... +2q: ... + +-- Test VACUUM with concurrent readers: +-- a) if reader transaction started before VACUUM, VACUUM should not drop the AWAITING_DROP segment +-- which was accessing by the reader; +-- b) if reader transaction started after VACUUM, VACUUM should drop the AWAITING_DROP segments. + +create or replace function show_aoseg(tabname text) returns table (segno int, tupcount bigint, modcount bigint, formatversion smallint, state smallint) as $$ declare tabrelid oid; /* in func */ tabsegrelid oid; /* in func */ tabsegrelname text; /* in func */ begin select tabname::regclass::oid into tabrelid; /* in func */ select segrelid from pg_appendonly where relid = tabrelid into tabsegrelid; /* in func */ select relname from pg_class where oid = tabsegrelid into tabsegrelname; /* in func */ +return query execute 'select segno,tupcount,modcount,formatversion,state from pg_aoseg.' || tabsegrelname; /* in func */ end; /* in func */ $$ language plpgsql; +CREATE + +create table vacuum_concurrent_test_@amname@ (a int, b int, c int) using @amname@; +CREATE +insert into vacuum_concurrent_test_@amname@ select 2, b, b from generate_series(1, 11) b; +INSERT 11 +create index i_b_vacuum_concurrent_reader_@amname@ on vacuum_concurrent_test_@amname@(b); +CREATE +update vacuum_concurrent_test_@amname@ set b = b + 1; +UPDATE 11 + +-- expect segment state is DEFAULT (state == 1) +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + segno | tupcount | modcount | formatversion | state +-------+----------+----------+---------------+------- + 1 | 22 | 2 | 4 | 1 +(1 row) + +-- start a reader before VACUUM +1: begin; +BEGIN +1: select * from vacuum_concurrent_test_@amname@ where b = 10; + a | b | c +---+----+--- + 2 | 10 | 9 +(1 row) + +vacuum vacuum_concurrent_test_@amname@; +VACUUM + +-- expect to see AWAITING_DROP(state == 2) tuple +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + segno | tupcount | modcount | formatversion | state +-------+----------+----------+---------------+------- + 1 | 22 | 2 | 4 | 2 + 2 | 11 | 0 | 4 | 1 +(2 rows) + +1: select * from vacuum_concurrent_test_@amname@ where b = 5; + a | b | c +---+---+--- + 2 | 5 | 4 +(1 row) +1: select * from vacuum_concurrent_test_@amname@ order by c; + a | b | c +---+----+---- + 2 | 2 | 1 + 2 | 3 | 2 + 2 | 4 | 3 + 2 | 5 | 4 + 2 | 6 | 5 + 2 | 7 | 6 + 2 | 8 | 7 + 2 | 9 | 8 + 2 | 10 | 9 + 2 | 11 | 10 + 2 | 12 | 11 +(11 rows) +1: end; +END + +-- start another reader after VACUUM +2: begin; +BEGIN +2: select * from vacuum_concurrent_test_@amname@ where c = 2; + a | b | c +---+---+--- + 2 | 3 | 2 +(1 row) + +-- expect to see AWAITING_DROP(state == 2) tuple for unable to drop the dead segment by the first VACUUM +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + segno | tupcount | modcount | formatversion | state +-------+----------+----------+---------------+------- + 1 | 22 | 2 | 4 | 2 + 2 | 11 | 0 | 4 | 1 +(2 rows) + +vacuum vacuum_concurrent_test_@amname@; +VACUUM + +-- expect no AWAITING_DROP(state == 2) tuple because it was dropped by the last VACUUM +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + segno | tupcount | modcount | formatversion | state +-------+----------+----------+---------------+------- + 1 | 0 | 2 | 4 | 1 + 2 | 11 | 0 | 4 | 1 +(2 rows) + +2: select * from vacuum_concurrent_test_@amname@ where b = 7; + a | b | c +---+---+--- + 2 | 7 | 6 +(1 row) +2: select * from vacuum_concurrent_test_@amname@ order by c; + a | b | c +---+----+---- + 2 | 2 | 1 + 2 | 3 | 2 + 2 | 4 | 3 + 2 | 5 | 4 + 2 | 6 | 5 + 2 | 7 | 6 + 2 | 8 | 7 + 2 | 9 | 8 + 2 | 10 | 9 + 2 | 11 | 10 + 2 | 12 | 11 +(11 rows) +2: end; +END + +1q: ... +2q: ... + +-- Test VACUUM with concurrent writer. +-- There was a concurrent insert transaction started prior to VACUUM, VACUUM should not mark +-- the target segment which was also writting by the concurrent writer to AWAITING_DROP, and +-- the corresponding index entries should not be deleted. + +truncate table vacuum_concurrent_test_@amname@; +TRUNCATE +insert into vacuum_concurrent_test_@amname@ select 2, b, b from generate_series(1, 5) b; +INSERT 5 +delete from vacuum_concurrent_test_@amname@; +DELETE 5 + +1: begin; +BEGIN +1: insert into vacuum_concurrent_test_@amname@ select 2, b, b from generate_series(6, 10) b; +INSERT 5 + +2: vacuum vacuum_concurrent_test_@amname@; +VACUUM + +1: commit; +COMMIT + +set enable_seqscan = on; +SET +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ order by b; + QUERY PLAN +------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: b + -> Sort + Sort Key: b + -> Seq Scan on vacuum_concurrent_test_@amname@ + Optimizer: Postgres query optimizer +(6 rows) +-- end_ignore +select * from vacuum_concurrent_test_@amname@ order by b; + a | b | c +---+----+---- + 2 | 6 | 6 + 2 | 7 | 7 + 2 | 8 | 8 + 2 | 9 | 9 + 2 | 10 | 10 +(5 rows) + +-- expect all bitmapindexscan results are consistent with above seqscan +set enable_seqscan = off; +SET +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ where b = b order by b; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: b + -> Sort + Sort Key: b + -> Bitmap Heap Scan on vacuum_concurrent_test_@amname@ + Recheck Cond: (b IS NOT NULL) + -> Bitmap Index Scan on i_b_vacuum_concurrent_reader_@amname@ + Index Cond: (b IS NOT NULL) + Optimizer: Postgres query optimizer +(9 rows) +-- end_ignore +select * from vacuum_concurrent_test_@amname@ where b = b order by b; + a | b | c +---+----+---- + 2 | 6 | 6 + 2 | 7 | 7 + 2 | 8 | 8 + 2 | 9 | 9 + 2 | 10 | 10 +(5 rows) + +-- expose dead tuples +set gp_select_invisible = true; +SET + +set enable_seqscan = on; +SET +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ order by b; + QUERY PLAN +------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: b + -> Sort + Sort Key: b + -> Seq Scan on vacuum_concurrent_test_@amname@ + Optimizer: Postgres query optimizer +(6 rows) +-- end_ignore +select * from vacuum_concurrent_test_@amname@ order by b; + a | b | c +---+----+---- + 2 | 1 | 1 + 2 | 2 | 2 + 2 | 3 | 3 + 2 | 4 | 4 + 2 | 5 | 5 + 2 | 6 | 6 + 2 | 7 | 7 + 2 | 8 | 8 + 2 | 9 | 9 + 2 | 10 | 10 +(10 rows) + +-- expect all bitmapindexscan results are same as above seqscan +set enable_seqscan = off; +SET +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ where b = b order by b; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: b + -> Sort + Sort Key: b + -> Bitmap Heap Scan on vacuum_concurrent_test_@amname@ + Recheck Cond: (b IS NOT NULL) + -> Bitmap Index Scan on i_b_vacuum_concurrent_reader_@amname@ + Index Cond: (b IS NOT NULL) + Optimizer: Postgres query optimizer +(9 rows) +-- end_ignore +select * from vacuum_concurrent_test_@amname@ where b = b order by b; + a | b | c +---+----+---- + 2 | 1 | 1 + 2 | 2 | 2 + 2 | 3 | 3 + 2 | 4 | 4 + 2 | 5 | 5 + 2 | 6 | 6 + 2 | 7 | 7 + 2 | 8 | 8 + 2 | 9 | 9 + 2 | 10 | 10 +(10 rows) + +-- vacuum again without concurrent reader/writer, expect above dead tuples could be removed +2: vacuum vacuum_concurrent_test_@amname@; +VACUUM + +set enable_seqscan = on; +SET +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ order by b; + QUERY PLAN +------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: b + -> Sort + Sort Key: b + -> Seq Scan on vacuum_concurrent_test_@amname@ + Optimizer: Postgres query optimizer +(6 rows) +-- end_ignore +select * from vacuum_concurrent_test_@amname@ order by b; + a | b | c +---+----+---- + 2 | 6 | 6 + 2 | 7 | 7 + 2 | 8 | 8 + 2 | 9 | 9 + 2 | 10 | 10 +(5 rows) + +-- expect all bitmapindexscan results are same as seqscan +set enable_seqscan = off; +SET +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ where b = b order by b; + QUERY PLAN +---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: b + -> Sort + Sort Key: b + -> Bitmap Heap Scan on vacuum_concurrent_test_@amname@ + Recheck Cond: (b IS NOT NULL) + -> Bitmap Index Scan on i_b_vacuum_concurrent_reader_@amname@ + Index Cond: (b IS NOT NULL) + Optimizer: Postgres query optimizer +(9 rows) +-- end_ignore +select * from vacuum_concurrent_test_@amname@ where b = b order by b; + a | b | c +---+----+---- + 2 | 6 | 6 + 2 | 7 | 7 + 2 | 8 | 8 + 2 | 9 | 9 + 2 | 10 | 10 +(5 rows) + +1q: ... +2q: ... + +reset enable_seqscan; +RESET +reset gp_select_invisible; +RESET + +drop table vacuum_concurrent_test_@amname@; +DROP +drop function show_aoseg; +DROP diff --git a/src/test/isolation2/output/uao/vacuum_self_serializable.source b/src/test/isolation2/output/uao/vacuum_self_serializable.source index fd70b62d4a1..cf109c338e5 100644 --- a/src/test/isolation2/output/uao/vacuum_self_serializable.source +++ b/src/test/isolation2/output/uao/vacuum_self_serializable.source @@ -19,9 +19,9 @@ SELECT COUNT(*) FROM ao; SELECT *, segno, tupcount FROM gp_ao_or_aocs_seg('ao'); segment_id | segno | tupcount | modcount | formatversion | state | segno | tupcount ------------+-------+----------+----------+---------------+-------+-------+---------- - 0 | 1 | 38 | 2 | 3 | 1 | 1 | 38 - 1 | 1 | 37 | 2 | 3 | 1 | 1 | 37 - 2 | 1 | 25 | 2 | 3 | 1 | 1 | 25 + 0 | 1 | 38 | 2 | 4 | 1 | 1 | 38 + 1 | 1 | 37 | 2 | 4 | 1 | 1 | 37 + 2 | 1 | 25 | 2 | 4 | 1 | 1 | 25 (3 rows) VACUUM ao; VACUUM @@ -29,9 +29,9 @@ VACUUM SELECT *, segno, tupcount FROM gp_ao_or_aocs_seg('ao') where state = 1 and tupcount > 0; segment_id | segno | tupcount | modcount | formatversion | state | segno | tupcount ------------+-------+----------+----------+---------------+-------+-------+---------- - 0 | 2 | 26 | 0 | 3 | 1 | 2 | 26 - 1 | 2 | 30 | 0 | 3 | 1 | 2 | 30 - 2 | 2 | 14 | 0 | 3 | 1 | 2 | 14 + 0 | 2 | 26 | 0 | 4 | 1 | 2 | 26 + 1 | 2 | 30 | 0 | 4 | 1 | 2 | 30 + 2 | 2 | 14 | 0 | 4 | 1 | 2 | 14 (3 rows) SELECT COUNT(*) FROM ao; count diff --git a/src/test/isolation2/sql/ao_blkdir.sql b/src/test/isolation2/sql/ao_blkdir.sql new file mode 100644 index 00000000000..73935d06800 --- /dev/null +++ b/src/test/isolation2/sql/ao_blkdir.sql @@ -0,0 +1,83 @@ +-- White-box tests asserting composition of AO/CO block directory entries. +-- All tuples are directed to seg0 and each INSERT has an increasing row count +-- to make their identification easy. + +-------------------------------------------------------------------------------- +-- AO tables +-------------------------------------------------------------------------------- + +CREATE TABLE ao_blkdir_test(i int, j int) USING ao_row DISTRIBUTED BY (j); +CREATE INDEX ao_blkdir_test_idx ON ao_blkdir_test(i); + +1: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(1, 10) i; +-- There should be 1 block directory row with a single entry covering 10 rows +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +1: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(11, 30) i; +-- There should be 2 block directory entries in a new block directory row, and +-- the row from the previous INSERT should not be visible. The entry from the +-- first INSERT should remain unchanged. +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +1: BEGIN; +1: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(31, 60) i; +2: BEGIN; +2: INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(71, 110) i; +1: COMMIT; +2: COMMIT; +-- The second INSERT of 40 rows above would have landed in segfile 1 (unlike +-- segfile 0, like the first INSERT of 30 rows above). This should be reflected +-- in the block directory entries for these rows. +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +TRUNCATE ao_blkdir_test; +-- Insert enough rows to overflow the first block directory minipage by 2. +INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(1, 292700) i; +-- There should be 2 block directory rows, one with 161 entries covering 292698 +-- rows and the other with 1 entry covering the 2 overflow rows. +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +-------------------------------------------------------------------------------- +-- AOCO tables +-------------------------------------------------------------------------------- + +CREATE TABLE aoco_blkdir_test(i int, j int) USING ao_column DISTRIBUTED BY (j); +CREATE INDEX aoco_blkdir_test_idx ON aoco_blkdir_test(i); + +1: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(1, 10) i; +-- There should be 2 block directory rows with a single entry covering 10 rows, +-- (1 for each column). +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +1: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(11, 30) i; +-- There should be 2 block directory rows, carrying 2 entries each. The rows +-- from the previous INSERT should not be visible. The entries from the first +-- INSERT should remain unchanged. +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +1: BEGIN; +1: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(31, 60) i; +2: BEGIN; +2: INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(71, 110) i; +1: COMMIT; +2: COMMIT; +-- The second INSERT of 40 rows above would have landed in segfile 1 (unlike +-- segfile 0, like the first INSERT of 30 rows above). This should be reflected +-- in the block directory entries for these rows. +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +TRUNCATE aoco_blkdir_test; +-- Insert enough rows to overflow the first block directory minipage by 2. +INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(1, 1317143) i; +-- There should be 2 block directory rows, 2 for each column, one with 161 +-- entries covering 1317141 rows and the other with 1 entry covering the 2 +-- overflow rows. +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; diff --git a/src/test/regress/expected/alter_table_set_am.out b/src/test/regress/expected/alter_table_set_am.out new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/test/regress/expected/gp_toolkit_ao_funcs.out b/src/test/regress/expected/gp_toolkit_ao_funcs.out index fe8f26d8218..91ca5182049 100644 --- a/src/test/regress/expected/gp_toolkit_ao_funcs.out +++ b/src/test/regress/expected/gp_toolkit_ao_funcs.out @@ -101,7 +101,7 @@ SELECT (t).* FROM ( ) AS x; tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count ---------+-------+----------------+----------+--------------+-------------+----------- - (0,2) | 1 | 0 | 0 | 1 | 0 | 100 + (0,2) | 1 | 0 | 0 | 1 | 0 | 20 (0,2) | 1 | 0 | 1 | 101 | 392 | 1 (2 rows) @@ -110,11 +110,11 @@ SELECT (t).* FROM ( ) AS x; tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count ---------+-------+----------------+----------+--------------+-------------+----------- - (0,4) | 1 | 0 | 0 | 1 | 0 | 100 + (0,4) | 1 | 0 | 0 | 1 | 0 | 20 (0,4) | 1 | 0 | 1 | 101 | 120 | 1 - (0,5) | 1 | 1 | 0 | 1 | 0 | 100 + (0,5) | 1 | 1 | 0 | 1 | 0 | 20 (0,5) | 1 | 1 | 1 | 101 | 120 | 1 - (0,6) | 1 | 2 | 0 | 1 | 0 | 100 + (0,6) | 1 | 2 | 0 | 1 | 0 | 20 (0,6) | 1 | 2 | 1 | 101 | 48 | 1 (6 rows) diff --git a/src/test/regress/output/gp_tablespace.source b/src/test/regress/output/gp_tablespace.source index e8678813e70..fbad4a38b11 100644 --- a/src/test/regress/output/gp_tablespace.source +++ b/src/test/regress/output/gp_tablespace.source @@ -166,9 +166,9 @@ select count(*) from ao_ul_ctas; select * from gp_toolkit.__gp_aoseg('ao_ul_ctas'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-------+----------+---------------+------------------+----------+---------------+------- - 2 | 0 | 84336 | 3247 | 3 | 84336 | 1 | 3 | 1 - 1 | 0 | 87816 | 3385 | 3 | 87816 | 1 | 3 | 1 - 0 | 0 | 87368 | 3368 | 3 | 87368 | 1 | 3 | 1 + 2 | 0 | 84336 | 3247 | 3 | 84336 | 1 | 4 | 1 + 1 | 0 | 87816 | 3385 | 3 | 87816 | 1 | 4 | 1 + 0 | 0 | 87368 | 3368 | 3 | 87368 | 1 | 4 | 1 (3 rows) select count(*) from aoco_ul_ctas; @@ -180,12 +180,12 @@ select count(*) from aoco_ul_ctas; select * from gp_toolkit.__gp_aocsseg('aoco_ul_ctas'); segment_id | segno | column_num | physical_segno | tupcount | eof | eof_uncompressed | modcount | formatversion | state ------------+-------+------------+----------------+----------+-------+------------------+----------+---------------+------- - 0 | 0 | 0 | 0 | 3368 | 13512 | 13512 | 1 | 3 | 1 - 0 | 0 | 1 | 128 | 3368 | 26608 | 26608 | 1 | 3 | 1 - 1 | 0 | 0 | 0 | 3385 | 13584 | 13584 | 1 | 3 | 1 - 1 | 0 | 1 | 128 | 3385 | 26760 | 26760 | 1 | 3 | 1 - 2 | 0 | 0 | 0 | 3247 | 13032 | 13032 | 1 | 3 | 1 - 2 | 0 | 1 | 128 | 3247 | 25656 | 25656 | 1 | 3 | 1 + 0 | 0 | 0 | 0 | 3368 | 13512 | 13512 | 1 | 4 | 1 + 0 | 0 | 1 | 128 | 3368 | 26608 | 26608 | 1 | 4 | 1 + 1 | 0 | 0 | 0 | 3385 | 13584 | 13584 | 1 | 4 | 1 + 1 | 0 | 1 | 128 | 3385 | 26760 | 26760 | 1 | 4 | 1 + 2 | 0 | 0 | 0 | 3247 | 13032 | 13032 | 1 | 4 | 1 + 2 | 0 | 1 | 128 | 3247 | 25656 | 25656 | 1 | 4 | 1 (6 rows) -- Check that init fork exists on master From 25fd24af07525ddd20315a4097c5068c1b4e6e62 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 10 Oct 2022 16:23:37 -0700 Subject: [PATCH 03/19] aoblkdir: Remove reset logic for minipage entries We discovered code in extract_minipage() which "resets" supposedly spurious block directory entries from crashed/cancelled inserts. This code is vestigial as the entries we are trying to fix can never be seen with an MVCC/dirty snapshot, as their block directory row(s) would be invisible (due to tx abort processing). Also, the assertions here such as Assert(fsinfo != NULL) get in the way of implementing unique index lookups for AO/CO, where we need to look up block directory entries which don't back actual on-disk tuples (where the fsinfo is not set etc), in a later commit. Co-authored-by: Ashwin Agrawal --- .../appendonly/appendonlyblockdirectory.c | 45 +------------------ 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index 61b493c9a82..a39eeefef8e 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -910,14 +910,7 @@ extract_minipage(AppendOnlyBlockDirectory *blockDirectory, { Datum *values = blockDirectory->values; bool *nulls = blockDirectory->nulls; - MinipagePerColumnGroup *minipageInfo = - &blockDirectory->minipages[columnGroupNo]; - FileSegInfo *fsInfo = blockDirectory->currentSegmentFileInfo; - int64 eof; - int start, - end, - mid = 0; - bool found = false; + MinipagePerColumnGroup *minipageInfo = &blockDirectory->minipages[columnGroupNo]; heap_deform_tuple(tuple, tupleDesc, values, nulls); @@ -932,42 +925,6 @@ extract_minipage(AppendOnlyBlockDirectory *blockDirectory, nulls[Anum_pg_aoblkdir_minipage - 1]); ItemPointerCopy(&tuple->t_self, &minipageInfo->tupleTid); - - /* - * When crashes during inserts, or cancellation during inserts, there are - * out-of-date minipage entries in the block directory. We reset those - * entries here. - */ - Assert(fsInfo != NULL); - if (!blockDirectory->isAOCol) - eof = fsInfo->eof; - else - eof = ((AOCSFileSegInfo *) fsInfo)->vpinfo.entry[columnGroupNo].eof; - - start = 0; - end = minipageInfo->numMinipageEntries - 1; - while (start <= end) - { - mid = (end - start + 1) / 2 + start; - if (minipageInfo->minipage->entry[mid].fileOffset > eof) - end = mid - 1; - else if (minipageInfo->minipage->entry[mid].fileOffset < eof) - start = mid + 1; - else - { - found = true; - break; - } - } - - minipageInfo->numMinipageEntries = 0; - if (found) - minipageInfo->numMinipageEntries = mid; - else if (start > 0) - { - minipageInfo->numMinipageEntries = start; - Assert(minipageInfo->minipage->entry[start - 1].fileOffset < eof); - } } /* From f21f7c960e52912000c0527e5779d4f4e0141717 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 10 Oct 2022 19:31:47 -0700 Subject: [PATCH 04/19] INSERT/COPY on AO/CO tables with unique indexes This commit introduces support for INSERT/COPY on AO and AOCO tables with unique indexes. Mechanism: (*) To answer uniqueness checks for AO/AOCO tables, we have a complication. Unlike heap, in AO/CO we don't store the xmin/xmax fields in the tuples. So, we have to rely on block directory rows that "cover" the data rows to satisfy index lookups. The xmin/xmax of the block directory row(s) help determine tuple visibility for uniqueness checks. (*) Since block directory rows are written usually much after the data row has been inserted, there are windows in which there is no block directory row on disk for a given data row - a problem for concurrent unique index checks. So during INSERT/COPY, at the beginning of the insertion operation, we insert a placeholder block directory row to cover ALL future tuples going to the current segment file for this command. (*) To answer unique index lookups, we don't have to physically fetch the tuple from the table. This is key to answering unique index lookups against placeholder rows which predate their corresponding data rows. We simply perform a sysscan of the block directory, and if we have a visible entry that encompasses the rowNum being looked up, we report success. (*) Tableam changes: Since there is a lot of overhead (leads to ~20x performance degradation in the worst case) in setting up and tearing down scan descriptors for AO/CO tables, we avoid the scanbegin..fetch..scanend construct in table_index_fetch_tuple_check(). So, we introduce a new tableam API index_fetch_tuple_exists(), which is implemented only for AO/CO tables. Here, we fetch a UniqueCheckDesc, which stores all the in-memory state to help us perform a unique index check. This descriptor is attached to the DMLState structs. Currently, the descriptor holds only a block directory struct. It will be modified later on to hold a visimap reference to help implement DELETEs/UPDATEs. Furthermore, we initialize this struct on the first unique index check performed, akin to how we initialize descriptors for insert and delete. (*) There is NO change to non-unique index related code paths. Notes: * This only gets INSERT/COPY to work. Other operations may yield unpredictable behavior. So we mark the feature as experimental, and can only be turned on by setting the gp_appendonly_enable_unique_index GUC * We also ban unique index creation if the AO table has a relation version <= AORelationVersion_PG83 and if it already has a block directory. This is because unique index checks need continuity inside a directory entry's range for correct behavior - something that legacy hole filling violates (PS: legacy hole filling was removed in 258ec966b26). Co-authored-by: Ashwin Agrawal --- src/backend/access/aocs/aocsam_handler.c | 161 ++++++- src/backend/access/appendonly/README.md | 36 ++ .../access/appendonly/appendonlyam_handler.c | 135 ++++++ .../appendonly/appendonlyblockdirectory.c | 292 ++++++++++++- src/backend/access/table/tableam.c | 8 + src/backend/catalog/aoblkdir.c | 57 +++ src/backend/commands/indexcmds.c | 21 +- src/backend/utils/misc/guc_gp.c | 12 + src/include/access/tableam.h | 23 + src/include/catalog/aoblkdir.h | 1 + src/include/cdb/cdbaocsam.h | 7 + src/include/cdb/cdbappendonlyam.h | 8 + src/include/cdb/cdbappendonlyblockdirectory.h | 11 + src/include/utils/guc.h | 1 + src/test/isolation2/expected/ao_blkdir.out | 197 +++++++++ .../isolation2/expected/ao_unique_index.out | 412 ++++++++++++++++++ .../isolation2/expected/aocs_unique_index.out | 412 ++++++++++++++++++ src/test/isolation2/isolation2_schedule | 7 +- src/test/isolation2/sql/ao_blkdir.sql | 122 ++++++ src/test/isolation2/sql/ao_unique_index.sql | 275 ++++++++++++ src/test/isolation2/sql/aocs_unique_index.sql | 275 ++++++++++++ 21 files changed, 2454 insertions(+), 19 deletions(-) create mode 100644 src/test/isolation2/expected/ao_unique_index.out create mode 100644 src/test/isolation2/expected/aocs_unique_index.out create mode 100644 src/test/isolation2/sql/ao_unique_index.sql create mode 100644 src/test/isolation2/sql/aocs_unique_index.sql diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 5e20b685d20..10a1e50d207 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -90,6 +90,7 @@ typedef struct AOCODMLState AOCSInsertDesc insertDesc; dlist_head head; // Head of multiple segment files insertion list. AOCSDeleteDesc deleteDesc; + AOCSUniqueCheckDesc uniqueCheckDesc; } AOCODMLState; static void reset_state_cb(void *arg); @@ -189,6 +190,7 @@ enter_dml_state(const Oid relationOid) state->insertDesc = NULL; state->deleteDesc = NULL; + state->uniqueCheckDesc = NULL; dlist_init(&state->head); Assert(!found); @@ -298,6 +300,15 @@ aoco_dml_finish(Relation relation, CmdType operation) state->insertDesc = NULL; } + if (state->uniqueCheckDesc) + { + AppendOnlyBlockDirectory_End_forSearch(state->uniqueCheckDesc->blockDirectory); + pfree(state->uniqueCheckDesc->blockDirectory); + state->uniqueCheckDesc->blockDirectory = NULL; + pfree(state->uniqueCheckDesc); + state->uniqueCheckDesc = NULL; + } + } /* @@ -315,8 +326,50 @@ get_insert_descriptor(const Relation relation) { List *segments = NIL; MemoryContext oldcxt; + AOCSInsertDesc insertDesc; oldcxt = MemoryContextSwitchTo(aocoLocal.stateCxt); + insertDesc = aocs_insert_init(relation, + ChooseSegnoForWrite(relation), + num_rows); + /* + * If we have a unique index, insert a placeholder block directory row to + * entertain uniqueness checks from concurrent inserts. See + * AppendOnlyBlockDirectory_InsertPlaceholder() for details. + * + * Note: For AOCO tables, we need to only insert a placeholder block + * directory row for the 1st non-dropped column. This is because + * during a uniqueness check, only the first non-dropped column's block + * directory entry is consulted. (See AppendOnlyBlockDirectory_CoversTuple()) + */ + if (relationHasUniqueIndex(relation)) + { + int firstNonDroppedColumn = -1; + int64 firstRowNum; + DatumStreamWrite *dsw; + BufferedAppend *bufferedAppend; + int64 fileOffset; + + for(int i = 0; i < relation->rd_att->natts; i++) + { + if (!relation->rd_att->attrs[i].attisdropped) { + firstNonDroppedColumn = i; + break; + } + } + Assert(firstNonDroppedColumn != -1); + + dsw = insertDesc->ds[firstNonDroppedColumn]; + firstRowNum = dsw->blockFirstRowNum; + bufferedAppend = &dsw->ao_write.bufferedAppend; + fileOffset = BufferedAppendNextBufferPosition(bufferedAppend); + + AppendOnlyBlockDirectory_InsertPlaceholder(&insertDesc->blockDirectory, + firstRowNum, + fileOffset, + firstNonDroppedColumn); + } + state->insertDesc = insertDesc; state->insertDesc = aocs_insert_init(relation, ChooseSegnoForWrite(relation)); dlist_init(&state->head); @@ -392,6 +445,29 @@ get_delete_descriptor(const Relation relation, bool forUpdate) return state->deleteDesc; } +static AOCSUniqueCheckDesc +get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) +{ + AOCODMLState *state = find_dml_state(RelationGetRelid(relation)); + + if (!state->uniqueCheckDesc) + { + MemoryContext oldcxt; + AOCSUniqueCheckDesc uniqueCheckDesc; + + oldcxt = MemoryContextSwitchTo(aocoLocal.stateCxt); + uniqueCheckDesc = palloc0(sizeof(AOCSUniqueCheckDescData)); + uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); + AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, + snapshot, NULL, -1, relation, + relation->rd_att->natts, false, NULL); + state->uniqueCheckDesc = uniqueCheckDesc; + MemoryContextSwitchTo(oldcxt); + } + + return state->uniqueCheckDesc; +} + /* * AO_COLUMN access method uses virtual tuples */ @@ -745,10 +821,90 @@ aoco_index_fetch_tuple(struct IndexFetchTableData *scan, if (aocs_fetch(aocoscan->aocofetch, (AOTupleId *) tid, slot)) { ExecStoreVirtualTuple(slot); - return true; + found = true; } - return false; + /* + * Currently, we don't determine this parameter. By contract, it is to be + * set to true iff we can determine that this row is dead to all + * transactions. Failure to set this will lead to use of a garbage value + * in certain code, such as that for unique index checks. + * This is typically used for HOT chains, which we don't support. + */ + if (all_dead) + *all_dead = false; + + /* Currently, we don't determine this parameter. By contract, it is to be + * set to true iff there is another tuple for the tid, so that we can prompt + * the caller to call index_fetch_tuple() again for the same tid. + * This is typically used for HOT chains, which we don't support. + */ + if (call_again) + *call_again = false; + + return found; +} + +/* + * Check if a visible tuple exists given the tid and a snapshot. This is + * currently used to determine uniqueness checks. + * + * We determine existence simply by checking if a *visible* block directory + * entry covers the given tid. + * + * There is no need to fetch the tuple (we actually can't reliably do so as + * we might encounter a placeholder row in the block directory) + */ +static bool +aoco_index_fetch_tuple_exists(Relation rel, + ItemPointer tid, + Snapshot snapshot, + bool *all_dead) +{ + AOCSUniqueCheckDesc uniqueCheckDesc; + AppendOnlyBlockDirectory *blockDirectory; + AOTupleId *aoTupleId = (AOTupleId *) tid; + +#ifdef USE_ASSERT_CHECKING + int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); + int64 rowNum = AOTupleIdGet_rowNum(aoTupleId); + + Assert(segmentFileNum != InvalidFileSegNumber); + Assert(rowNum != InvalidAORowNum); + /* + * Since this can only be called in the context of a unique index check, the + * snapshots that are supplied can only be non-MVCC snapshots: SELF and DIRTY. + */ + Assert(snapshot->snapshot_type == SNAPSHOT_SELF || + snapshot->snapshot_type == SNAPSHOT_DIRTY); +#endif + + /* + * Currently, we don't determine this parameter. By contract, it is to be + * set to true iff we can determine that this row is dead to all + * transactions. Failure to set this will lead to use of a garbage value + * in certain code, such as that for unique index checks. + * This is typically used for HOT chains, which we don't support. + */ + if (all_dead) + *all_dead = false; + + /* + * FIXME: for when we want CREATE UNIQUE INDEX CONCURRENTLY to work + * Unique constraint violation checks with SNAPSHOT_SELF are currently + * required to support CREATE UNIQUE INDEX CONCURRENTLY. Currently, the + * sole placeholder row inserted at first insert might not be visible to + * the snapshot, if it was already updated by its actual first row. So, + * we would need to flush a placeholder row at the beginning of each new + * in-memory minipage. Currently, CREATE INDEX CONCURRENTLY isn't + * supported, so we assume such a check satisfies SNAPSHOT_SELF. + */ + if (snapshot->snapshot_type == SNAPSHOT_SELF) + return true; + + uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); + blockDirectory = uniqueCheckDesc->blockDirectory; + return AppendOnlyBlockDirectory_CoversTuple(blockDirectory, aoTupleId); } static void @@ -1964,6 +2120,7 @@ static TableAmRoutine ao_column_methods = { .index_fetch_reset = aoco_index_fetch_reset, .index_fetch_end = aoco_index_fetch_end, .index_fetch_tuple = aoco_index_fetch_tuple, + .index_fetch_tuple_exists = aoco_index_fetch_tuple_exists, .tuple_insert = aoco_tuple_insert, .tuple_insert_speculative = aoco_tuple_insert_speculative, diff --git a/src/backend/access/appendonly/README.md b/src/backend/access/appendonly/README.md index 6d43acf0dfa..798b7b0b23e 100644 --- a/src/backend/access/appendonly/README.md +++ b/src/backend/access/appendonly/README.md @@ -178,3 +178,39 @@ Vacuum drop phase, to recycle segments that have been compacted, checks the xmin of each AWAITING_DROP segment. If it's visible to everyone, the segfile is recycled. It uses the relation extension lock to protect the scan over pg_aoseg. + + +# Unique indexes + +To answer uniqueness checks for AO/AOCO tables, we have a complication. Unlike +heap, in AO/CO we don't store the xmin/xmax fields in the tuples. So, we have to +rely on block directory rows that "cover" the data rows to satisfy index lookups. +The xmin/xmax of the block directory row(s) help determine tuple visibility for +uniqueness checks. + +Since block directory rows are written usually much after the data row has been +inserted, there are windows in which there is no block directory row on disk +for a given data row - a problem for concurrent unique index checks. So during +INSERT/COPY, at the beginning of the insertion operation, we insert a +placeholder block directory row to cover ALL future tuples going to the current +segment file for this command. + +To answer unique index lookups, we don't have to physically fetch the tuple from +the table. This is key to answering unique index lookups against placeholder +rows which predate their corresponding data rows. We simply perform a sysscan of +the block directory, and if we have a visible entry that encompasses the rowNum +being looked up, we report success. + +Tableam changes: Since there is a lot of overhead (leads to ~20x performance +degradation in the worst case) in setting up and tearing down scan descriptors +for AO/CO tables, we avoid the scanbegin..fetch..scanend construct in +table_index_fetch_tuple_check(). + +So, a new tableam API index_fetch_tuple_exists() is used, which is implemented +only for AO/CO tables. Here, we fetch a UniqueCheckDesc, which stores all the +in-memory state to help us perform a unique index check. This descriptor is +attached to the DMLState structs. Currently, the descriptor holds only a block +directory struct. It will be modified later on to hold a visimap reference to +help implement DELETEs/UPDATEs. Furthermore, we initialize this struct on the +first unique index check performed, akin to how we initialize descriptors for +insert and delete. diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index 12dce83d488..c11ac90a300 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -61,6 +61,7 @@ typedef struct AppendOnlyDMLState AppendOnlyInsertDesc insertDesc; dlist_head head; // Head of multiple segment files insertion list. AppendOnlyDeleteDesc deleteDesc; + AppendOnlyUniqueCheckDesc uniqueCheckDesc; } AppendOnlyDMLState; @@ -160,6 +161,7 @@ enter_dml_state(const Oid relationOid) state->insertDesc = NULL; state->deleteDesc = NULL; + state->uniqueCheckDesc = NULL; dlist_init(&state->head); Assert(!found); @@ -268,6 +270,15 @@ appendonly_dml_finish(Relation relation, CmdType operation) appendonly_insert_finish(state->insertDesc, &state->head); state->insertDesc = NULL; } + + if (state->uniqueCheckDesc) + { + AppendOnlyBlockDirectory_End_forSearch(state->uniqueCheckDesc->blockDirectory); + pfree(state->uniqueCheckDesc->blockDirectory); + state->uniqueCheckDesc->blockDirectory = NULL; + pfree(state->uniqueCheckDesc); + state->uniqueCheckDesc = NULL; + } } /* @@ -300,8 +311,29 @@ get_insert_descriptor(const Relation relation) { List *segments = NIL; MemoryContext oldcxt; + AppendOnlyInsertDesc insertDesc; oldcxt = MemoryContextSwitchTo(appendOnlyLocal.stateCxt); + insertDesc = appendonly_insert_init(relation, + ChooseSegnoForWrite(relation), + num_rows); + /* + * If we have a unique index, insert a placeholder block directory row + * to entertain uniqueness checks from concurrent inserts. See + * AppendOnlyBlockDirectory_InsertPlaceholder() for details. + */ + if (relationHasUniqueIndex(relation)) + { + int64 firstRowNum = insertDesc->lastSequence + 1; + BufferedAppend *bufferedAppend = &insertDesc->storageWrite.bufferedAppend; + int64 fileOffset = BufferedAppendNextBufferPosition(bufferedAppend); + + AppendOnlyBlockDirectory_InsertPlaceholder(&insertDesc->blockDirectory, + firstRowNum, + fileOffset, + 0); + } + state->insertDesc = insertDesc; state->insertDesc = appendonly_insert_init(relation, ChooseSegnoForWrite(relation)); @@ -376,6 +408,28 @@ get_delete_descriptor(const Relation relation, bool forUpdate) return state->deleteDesc; } +static AppendOnlyUniqueCheckDesc +get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) +{ + AppendOnlyDMLState *state = find_dml_state(RelationGetRelid(relation)); + + if (!state->uniqueCheckDesc) + { + MemoryContext oldcxt; + AppendOnlyUniqueCheckDesc uniqueCheckDesc; + + oldcxt = MemoryContextSwitchTo(appendOnlyLocal.stateCxt); + uniqueCheckDesc = palloc0(sizeof(AppendOnlyUniqueCheckDescData)); + uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); + AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, + snapshot, NULL, -1, relation, + 1, false, NULL); + state->uniqueCheckDesc = uniqueCheckDesc; + MemoryContextSwitchTo(oldcxt); + } + + return state->uniqueCheckDesc; +} /* ------------------------------------------------------------------------ * Slot related callbacks for appendonly AM @@ -547,9 +601,89 @@ appendonly_index_fetch_tuple(struct IndexFetchTableData *scan, appendonly_fetch(aoscan->aofetch, (AOTupleId *) tid, slot); + /* + * Currently, we don't determine this parameter. By contract, it is to be + * set to true iff we can determine that this row is dead to all + * transactions. Failure to set this will lead to use of a garbage value + * in certain code, such as that for unique index checks. + * This is typically used for HOT chains, which we don't support. + */ + if (all_dead) + *all_dead = false; + + /* Currently, we don't determine this parameter. By contract, it is to be + * set to true iff there is another tuple for the tid, so that we can prompt + * the caller to call index_fetch_tuple() again for the same tid. + * This is typically used for HOT chains, which we don't support. + */ + if (call_again) + *call_again = false; + return !TupIsNull(slot); } +/* + * Check if a visible tuple exists given the tid and a snapshot. This is + * currently used to determine uniqueness checks. + * + * We determine existence simply by checking if a *visible* block directory + * entry covers the given tid. + * + * There is no need to fetch the tuple (we actually can't reliably do so as + * we might encounter a placeholder row in the block directory) + */ +static bool +appendonly_index_fetch_tuple_exists(Relation rel, + ItemPointer tid, + Snapshot snapshot, + bool *all_dead) +{ + AppendOnlyUniqueCheckDesc uniqueCheckDesc; + AppendOnlyBlockDirectory *blockDirectory; + AOTupleId *aoTupleId = (AOTupleId *) tid; + +#ifdef USE_ASSERT_CHECKING + int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); + int64 rowNum = AOTupleIdGet_rowNum(aoTupleId); + + Assert(segmentFileNum != InvalidFileSegNumber); + Assert(rowNum != InvalidAORowNum); + /* + * Since this can only be called in the context of a unique index check, the + * snapshots that are supplied can only be non-MVCC snapshots: SELF and DIRTY. + */ + Assert(snapshot->snapshot_type == SNAPSHOT_SELF || + snapshot->snapshot_type == SNAPSHOT_DIRTY); +#endif + + /* + * Currently, we don't determine this parameter. By contract, it is to be + * set to true iff we can determine that this row is dead to all + * transactions. Failure to set this will lead to use of a garbage value + * in certain code, such as that for unique index checks. + * This is typically used for HOT chains, which we don't support. + */ + if (all_dead) + *all_dead = false; + + /* + * FIXME: for when we want CREATE UNIQUE INDEX CONCURRENTLY to work + * Unique constraint violation checks with SNAPSHOT_SELF are currently + * required to support CREATE UNIQUE INDEX CONCURRENTLY. Currently, the + * sole placeholder row inserted at first insert might not be visible to + * the snapshot, if it was already updated by its actual first row. So, + * we would need to flush a placeholder row at the beginning of each new + * in-memory minipage. Currently, CREATE INDEX CONCURRENTLY isn't + * supported, so we assume such a check satisfies SNAPSHOT_SELF. + */ + if (snapshot->snapshot_type == SNAPSHOT_SELF) + return true; + + uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); + blockDirectory = uniqueCheckDesc->blockDirectory; + return AppendOnlyBlockDirectory_CoversTuple(blockDirectory, aoTupleId); +} + /* ------------------------------------------------------------------------ * Callbacks for non-modifying operations on individual tuples for @@ -2110,6 +2244,7 @@ static const TableAmRoutine ao_row_methods = { .index_fetch_reset = appendonly_index_fetch_reset, .index_fetch_end = appendonly_index_fetch_end, .index_fetch_tuple = appendonly_index_fetch_tuple, + .index_fetch_tuple_exists = appendonly_index_fetch_tuple_exists, .tuple_insert = appendonly_tuple_insert, .tuple_insert_speculative = appendonly_tuple_insert_speculative, diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index a39eeefef8e..b465a47a940 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -14,6 +14,7 @@ */ #include "postgres.h" +#include "access/xact.h" #include "cdb/cdbappendonlyblockdirectory.h" #include "catalog/aoblkdir.h" #include "catalog/pg_appendonly.h" @@ -22,6 +23,7 @@ #include "parser/parse_oper.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/faultinjector.h" #include "utils/guc.h" #include "utils/fmgroids.h" #include "cdb/cdbappendonlyam.h" @@ -55,6 +57,10 @@ static bool insert_new_entry(AppendOnlyBlockDirectory *blockDirectory, int64 fileOffset, int64 rowCount, bool addColAction); +static void clear_minipage(MinipagePerColumnGroup *minipagePerColumnGroup); +static bool blkdir_entry_exists(AppendOnlyBlockDirectory *blockDirectory, + AOTupleId *aoTupleId, + int columnGroupNo); void AppendOnlyBlockDirectoryEntry_GetBeginRange( @@ -145,6 +151,7 @@ init_internal(AppendOnlyBlockDirectory *blockDirectory) minipageInfo->minipage = palloc0(minipage_size(NUM_MINIPAGE_ENTRIES)); minipageInfo->numMinipageEntries = 0; + ItemPointerSetInvalid(&minipageInfo->tupleTid); } MemoryContextSwitchTo(oldcxt); @@ -667,6 +674,170 @@ AppendOnlyBlockDirectory_GetEntry( return false; } +/* + * AppendOnlyBlockDirectory_CoversTuple + * + * Check if there exists a visible block directory entry that represents a range + * in which this tid resides. + * + * Currently used by index fetches to perform unique constraint validation. A + * sysscan of the block directory relation is performed to determine the result. + * (see blkdir_entry_exists()) + * + * Performing a sysscan also has the distinct advantage of setting the xmin/xmax + * of the snapshot used to scan, which is a requirement when SNAPSHOT_DIRTY is + * used. See _bt_check_unique() and SNAPSHOT_DIRTY for details. + * + * Note about AOCO tables: + * For AOCO tables, there are multiple block directory entries for each tid. + * However, it is currently sufficient to check the block directory entry for + * just one of these columns. We do so for the 1st non-dropped column. Note that + * if we write a placeholder row for the 1st non-dropped column i, there is a + * guarantee that if there is a conflict on the placeholder row, the covering + * block directory entry will be based on the same column i (as columnar DDL + * changes need exclusive locks and placeholder rows can't be seen after tx end) + * (We could just have checked the covers condition for column 0, as block + * directory entries are inserted even for dropped columns. But, this may change + * one day, and we want our code to be future-proof) + */ +bool +AppendOnlyBlockDirectory_CoversTuple( + AppendOnlyBlockDirectory *blockDirectory, + AOTupleId *aoTupleId) +{ + Relation aoRel = blockDirectory->aoRel; + int firstNonDroppedColumn = -1; + + Assert(RelationIsValid(aoRel)); + + if (RelationIsAoRows(aoRel)) + return blkdir_entry_exists(blockDirectory, aoTupleId, 0); + else + { + for(int i = 0; i < aoRel->rd_att->natts; i++) + { + if (!aoRel->rd_att->attrs[i].attisdropped) { + firstNonDroppedColumn = i; + break; + } + } + Assert(firstNonDroppedColumn != -1); + + return blkdir_entry_exists(blockDirectory, + aoTupleId, + firstNonDroppedColumn); + } +} + +/* + * Does a visible block directory entry exist for a given aotid and column no? + * Currently used to satisfy unique constraint checks. + */ +static bool +blkdir_entry_exists(AppendOnlyBlockDirectory *blockDirectory, + AOTupleId *aoTupleId, + int columnGroupNo) +{ + int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); + int64 rowNum = AOTupleIdGet_rowNum(aoTupleId); + Relation blkdirRel = blockDirectory->blkdirRel; + Relation blkdirIdx = blockDirectory->blkdirIdx; + ScanKey scanKeys = blockDirectory->scanKeys; + HeapTuple tuple; + SysScanDesc idxScanDesc; + bool found = false; + TupleDesc blkdirTupleDesc; + + Assert(RelationIsValid(blkdirRel)); + + ereportif(Debug_appendonly_print_blockdirectory, LOG, + (errmsg("Append-only block directory covers tuple check: " + "(columnGroupNo, segmentFileNum, rowNum) = " + "(%d, %d, " INT64_FORMAT ")", + 0, segmentFileNum, rowNum))); + + blkdirTupleDesc = RelationGetDescr(blkdirRel); + + /* + * Set up the scan keys values. The keys have already been set up in + * init_internal() with the following strategy: + * (=segmentFileNum, =columnGroupNo, <=rowNum) + * See init_internal(). + */ + Assert(scanKeys != NULL); + Assert(blockDirectory->numScanKeys == 3); + scanKeys[0].sk_argument = Int32GetDatum(segmentFileNum); + scanKeys[1].sk_argument = Int32GetDatum(columnGroupNo); + scanKeys[2].sk_argument = Int64GetDatum(rowNum); + idxScanDesc = systable_beginscan_ordered(blkdirRel, blkdirIdx, + blockDirectory->appendOnlyMetaDataSnapshot, + blockDirectory->numScanKeys, + scanKeys); + + /* + * + * Loop until: + * + * (1) No rows are returned from the sysscan, as there is no visible row + * satisfying the criteria. This is what happens when there is no uniqueness + * conflict, when we call this in the context of a uniqueness check. + * + * (2) We find a row such that: rowNum ∈ [firstRowNum, firstRowNum + rowCount) + * (a) The row is a regular block directory row covering the rowNum. + * (b) The row is a placeholder block directory row, inserted by + * AppendOnlyBlockDirectory_InsertPlaceholder(), which will always + * cover the rowNum by virtue of it's rowCount = AOTupleId_MaxRowNum. + */ + while (HeapTupleIsValid(tuple = systable_getnext_ordered(idxScanDesc, BackwardScanDirection))) + { + /* + * Once we have found a matching row, we must also ensure that we check + * for a block directory entry, in this row's minipage, that has a range + * that covers the rowNum. + * + * This is necessary for aborted transactions where the index entry + * might still be live. In such a case, since our search criteria lacks + * a lastRowNum, we will match rows where: + * firstRowNum < lastRowNum < rowNum + * Such rows will obviously not cover the rowNum, thus making inspection + * of the row's minipage a necessity. + */ + MinipagePerColumnGroup *minipageInfo; + int entry_no; + + BlockNumber blockNumber = ItemPointerGetBlockNumberNoCheck(&tuple->t_self); + OffsetNumber offsetNumber = ItemPointerGetOffsetNumberNoCheck(&tuple->t_self); + elogif(Debug_appendonly_print_blockdirectory, LOG, + "For segno = %d, rownum = %ld, tid returned: (%u,%u) " + "tuple (xmin, xmax) = (%lu, %lu), snaptype = %d", + segmentFileNum, rowNum, blockNumber, offsetNumber, + (unsigned long) HeapTupleHeaderGetRawXmin(tuple->t_data), + (unsigned long) HeapTupleHeaderGetRawXmax(tuple->t_data), + blockDirectory->appendOnlyMetaDataSnapshot->snapshot_type); + + /* Set this so that we don't blow up in the assert in extract_minipage */ + blockDirectory->currentSegmentFileNum = segmentFileNum; + extract_minipage(blockDirectory, + tuple, + blkdirTupleDesc, + columnGroupNo); + + minipageInfo = &blockDirectory->minipages[columnGroupNo]; + entry_no = find_minipage_entry(minipageInfo->minipage, + minipageInfo->numMinipageEntries, + rowNum); + if (entry_no != -1) + { + found = true; + break; + } + } + + systable_endscan_ordered(idxScanDesc); + + return found; +} + /* * AppendOnlyBlockDirectory_InsertEntry * @@ -703,6 +874,14 @@ AppendOnlyBlockDirectory_InsertEntry( * Helper method used to insert a new minipage entry in the block * directory relation. Refer to AppendOnlyBlockDirectory_InsertEntry() * for more details. + * + * 1. Checks if the current minipage is full. If yes, it writes the current + * minipage to the block directory relation and empty the in-memory area. This + * could mean a new block directory tuple is inserted OR an old tuple is updated. + * + * 2. "Inserts" the new entry in the current in-mem minipage -> just sets the + * in-memory area with the supplied function args. + * */ static bool insert_new_entry( @@ -745,23 +924,22 @@ insert_new_entry( minipageInfo = &blockDirectory->minipages[minipageIndex]; Assert(minipageInfo->numMinipageEntries <= (uint32) NUM_MINIPAGE_ENTRIES); - if (minipageInfo->numMinipageEntries >= (uint32) gp_blockdirectory_minipage_size) + /* + * Before we insert the new entry into the current minipage, we should + * check if the current minipage is full. If so, we write out the current + * minipage to the block directory relation and clear out the last minipage + * in-mem, making the current in-mem minipage empty and ready to hold the + * new entry (and beyond). + */ + if (IsMinipageFull(minipageInfo)) { write_minipage(blockDirectory, columnGroupNo, minipageInfo); - - /* Set tupleTid to invalid */ - ItemPointerSetInvalid(&minipageInfo->tupleTid); - - /* - * Clear out the entries. - */ - MemSet(minipageInfo->minipage->entry, 0, - minipageInfo->numMinipageEntries * sizeof(MinipageEntry)); - minipageInfo->numMinipageEntries = 0; + clear_minipage(minipageInfo); + SIMPLE_FAULT_INJECTOR("insert_new_entry_curr_minipage_full"); } + /* Now insert the new entry */ Assert(minipageInfo->numMinipageEntries < (uint32) gp_blockdirectory_minipage_size); - entry = &(minipageInfo->minipage->entry[minipageInfo->numMinipageEntries]); entry->firstRowNum = firstRowNum; entry->fileOffset = fileOffset; @@ -1129,11 +1307,101 @@ write_minipage(AppendOnlyBlockDirectory *blockDirectory, CatalogTupleInsertWithInfo(blkdirRel, tuple, indinfo); } + /* memorize updated/inserted tuple header info */ + ItemPointerCopy(&tuple->t_self, &minipageInfo->tupleTid); + heap_freetuple(tuple); MemoryContextSwitchTo(oldcxt); } +static void +clear_minipage(MinipagePerColumnGroup *minipagePerColumnGroup) +{ + MemSet(minipagePerColumnGroup->minipage->entry, 0, + minipagePerColumnGroup->numMinipageEntries * sizeof(MinipageEntry)); + minipagePerColumnGroup->numMinipageEntries = 0; + ItemPointerSetInvalid(&minipagePerColumnGroup->tupleTid); +} + +/* + * AppendOnlyBlockDirectory_InsertPlaceholder + * + * We perform uniqueness checks by looking up block directory rows that cover + * the rowNum indicated by the aotid obtained from the index. See + * AppendOnlyBlockDirectory_CoversTuple() for details. + * + * However, there are multiple time windows in which there are no covering block + * directory entries in the table for already inserted data rows. Such time + * windows start from when a data row is inserted and lasts till the block + * directory row covering it is written to the block directory table (see + * write_minipage()). Block directory rows are written only when: + * (i) the current in-memory minipage is full + * (ii) at end of command. + * + * So we insert a placeholder entry in the current block directory row and + * persist the row before the first insert to cover rows in the range: + * [firstRowNum, lastRowNum], starting at firstOffset in the relfile + * corresponding to columnGroupNo. + * + * firstRowNum is the rowNum assigned to the 1st insert of the insert command. + * lastRowNum is the last rowNum that will be entered by the insert command, + * which is something unknown to us. So, to cover all such windows during the + * insert command's execution, we insert an entry with a placeholder + * rowcount = AOTupleId_MaxRowNum into the current minipage and write it to the + * relation (by reusing the machinery in write_minipage()). Such a row whose + * last entry is a placeholder entry is called a placeholder row. This entry + * will cover up to lastRowNum, whatever its value may be, for all such time + * windows during the insert command. + * + * Safety: + * (1) The placeholder upper bound is not a concern as this row will be consulted + * ONLY by SNAPSHOT_DIRTY (for uniqueness checks) and will be ignored by regular + * MVCC processing (for index scans). Eventually, it will be rendered invisible + * as it will be updated by a subsequent write_minipage() or by virtue of abort. + * + * (2) There is no way a placeholder row will detect spurious conflicts due to + * its loose upper bound, in the same segment file, to which it maps. This is + * because there can be no other rows inserted into a segment file other than + * the insert operation that is currently in progress on the file. + */ +void +AppendOnlyBlockDirectory_InsertPlaceholder(AppendOnlyBlockDirectory *blockDirectory, + int64 firstRowNum, + int64 fileOffset, + int columnGroupNo) +{ + MinipagePerColumnGroup *minipagePerColumnGroup; + + Assert(firstRowNum > 0); + Assert(fileOffset >= 0); + Assert(RelationIsValid(blockDirectory->blkdirRel)); + Assert(columnGroupNo >= 0 && + columnGroupNo < blockDirectory->aoRel->rd_att->natts); + + minipagePerColumnGroup = &blockDirectory->minipages[columnGroupNo]; + + /* insert placeholder entry with a max row count */ + insert_new_entry(blockDirectory, columnGroupNo, firstRowNum, fileOffset, + AOTupleId_MaxRowNum, false); + /* insert placeholder row containing placeholder entry */ + write_minipage(blockDirectory, columnGroupNo, minipagePerColumnGroup); + /* + * Delete the placeholder entry as it has no business being in memory. + * Removing it from the current minipage will make rest of the processing + * for the current command behave as if it never existed. The absence of + * this entry will help effectively "update" it once it's replacement entry + * is created in memory, in a subsequent call to insert_new_entry(), + * followed by a write_minipage() which will make this "update" persistent. + */ + minipagePerColumnGroup->numMinipageEntries--; + /* + * Increment the command counter, as we will be updating this temp row later + * on in write_minipage(). + */ + CommandCounterIncrement(); +} + void AppendOnlyBlockDirectory_End_forInsert( AppendOnlyBlockDirectory *blockDirectory) diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 5dfc3d659c2..3f5fa7439f0 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -283,6 +283,14 @@ table_index_fetch_tuple_check(Relation rel, bool call_again = false; bool found; + /* + * Optimized path for AO/CO relations as the aforementioned per-tuple + * overhead is significant for AO/CO relations. For details, please refer to + * table_index_fetch_tuple_exists(). + */ + if (RelationIsAppendOptimized(rel)) + return table_index_fetch_tuple_exists(rel, tid, snapshot, all_dead); + slot = table_slot_create(rel, NULL); scan = table_index_fetch_begin(rel); found = table_index_fetch_tuple(scan, tid, snapshot, slot, &call_again, diff --git a/src/backend/catalog/aoblkdir.c b/src/backend/catalog/aoblkdir.c index d7c8e07f46b..112848b9693 100644 --- a/src/backend/catalog/aoblkdir.c +++ b/src/backend/catalog/aoblkdir.c @@ -16,6 +16,8 @@ */ #include "postgres.h" +#include "access/aosegfiles.h" +#include "access/aocssegfiles.h" #include "access/table.h" #include "catalog/pg_am.h" #include "catalog/pg_opclass.h" @@ -119,3 +121,58 @@ AlterTableCreateAoBlkdirTable(Oid relOid) table_close(rel, NoLock); } +/* + * In relation versions older than AORelationVersion_PG12, block directory + * entries can lie about the continuity of rows *within* their range, due to + * legacy hole filling logic. Since unique index checks rely on this continuity, + * such indexes cannot be created on these relations. + * + * Called only when rel has a block directory. + */ +void +ValidateRelationVersionForUniqueIndex(Relation rel) +{ + bool error = false; + int errsegno; + int errversion; + int totalsegs; + + Assert(RelationIsAppendOptimized(rel)); + + if (RelationIsAoRows(rel)) + { + FileSegInfo **fsInfo = GetAllFileSegInfo(rel, NULL, &totalsegs, NULL); + for (int i = 0; i < totalsegs; i++) + { + if (fsInfo[i]->formatversion < AORelationVersion_PG12) + { + error = true; + errsegno = fsInfo[i]->segno; + errversion = fsInfo[i]->formatversion; + break; + } + } + } + else + { + AOCSFileSegInfo **aocsFsInfo = GetAllAOCSFileSegInfo(rel, NULL, &totalsegs, NULL); + for (int i = 0; i < totalsegs; i++) + { + if (aocsFsInfo[i]->formatversion < AORelationVersion_PG12) + { + error = true; + errsegno = aocsFsInfo[i]->segno; + errversion = aocsFsInfo[i]->formatversion; + break; + } + } + } + + if (error) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("append-only tables with older relation versions do not support unique indexes"), + errdetail("in segno = %d: version found = %d, minimum version required = %d", + errsegno, errversion, AORelationVersion_PG12), + errhint("truncate and reload the table data before creating the unique index"))); +} diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 71c85687345..c2b7ea583bc 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -701,6 +701,7 @@ DefineIndex(Oid relationId, int root_save_nestlevel; int i; bool shouldDispatch; + Oid blkdirrelid = InvalidOid; shouldDispatch = (Gp_role == GP_ROLE_DISPATCH && ENABLE_DISPATCH() && @@ -819,7 +820,6 @@ DefineIndex(Oid relationId, rel = table_open(relationId, NoLock); if (RelationIsAppendOptimized(rel)) { - Oid blkdirrelid = InvalidOid; GetAppendOnlyEntryAuxOids(relationId, NULL, NULL, &blkdirrelid, NULL, NULL, NULL); if (!OidIsValid(blkdirrelid)) @@ -1071,9 +1071,22 @@ DefineIndex(Oid relationId, accessMethodName))); if (stmt->unique && RelationIsAppendOptimized(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("append-only tables do not support unique indexes"))); + { + /* XXX: Remove when unique indexes are fully supported on AO/CO tables. */ + if (!gp_appendonly_enable_unique_index) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("append-only tables do not support unique indexes"))); + + if (stmt->concurrent) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("append-only tables do not support unique indexes built concurrently"))); + + /* Additional version checks needed if block directory already exists */ + if (OidIsValid(blkdirrelid)) + ValidateRelationVersionForUniqueIndex(rel); + } /* * The TableAmRoutine of AO/AOCS does not implement the index_validate_scan method, diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 2e39cea261c..77add09bb54 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -138,6 +138,7 @@ bool Test_print_prefetch_joinqual = false; bool Test_copy_qd_qe_split = false; bool gp_permit_relation_node_change = false; int gp_max_local_distributed_cache = 1024; +bool gp_appendonly_enable_unique_index = false; bool gp_appendonly_verify_block_checksums = true; bool gp_appendonly_verify_write_block = false; bool gp_appendonly_compaction = true; @@ -882,6 +883,17 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, + { + {"gp_appendonly_enable_unique_index", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Enable unique indexes on AO/CO tables (experimental)."), + NULL, + GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL + }, + &gp_appendonly_enable_unique_index, + false, + NULL, NULL, NULL + }, + { {"gp_appendonly_verify_block_checksums", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Verify the append-only block checksum when reading."), diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index ff0bf69a670..f5433ddaa2b 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -458,6 +458,11 @@ typedef struct TableAmRoutine TupleTableSlot *slot, bool *call_again, bool *all_dead); + /* See table_index_fetch_tuple_exists() for details */ + bool (*index_fetch_tuple_exists) (Relation rel, + ItemPointer tid, + Snapshot snapshot, + bool *all_dead); /* ------------------------------------------------------------------------ * Callbacks for non-modifying operations on individual tuples @@ -1325,6 +1330,24 @@ extern bool table_index_fetch_tuple_check(Relation rel, Snapshot snapshot, bool *all_dead); +/* + * GPDB: Check if a tuple exists for a given tid obtained from an index. + * This is used to entertain unique index checks on AO/CO tables. For heap + * tables, the regular method of beginindexscan..fetchtuple..endindexscan + * can be used. Creating/destroying scan descriptors for AO/CO tables are + * too expensive to be done on a per-tuple basis. + * + * This has to have an identical signature to table_index_fetch_tuple_check(). + */ +static inline bool +table_index_fetch_tuple_exists(Relation rel, + ItemPointer tid, + Snapshot snapshot, + bool *all_dead) +{ + return rel->rd_tableam->index_fetch_tuple_exists(rel, tid, snapshot, + all_dead); +} /* ------------------------------------------------------------------------ * Functions for non-modifying operations on individual tuples diff --git a/src/include/catalog/aoblkdir.h b/src/include/catalog/aoblkdir.h index 1b94ba90927..dc4648acf05 100644 --- a/src/include/catalog/aoblkdir.h +++ b/src/include/catalog/aoblkdir.h @@ -26,5 +26,6 @@ #define Anum_pg_aoblkdir_minipage 4 extern void AlterTableCreateAoBlkdirTable(Oid relOid); +extern void ValidateRelationVersionForUniqueIndex(Relation rel); #endif diff --git a/src/include/cdb/cdbaocsam.h b/src/include/cdb/cdbaocsam.h index 1cf80410190..a1c55bcba00 100644 --- a/src/include/cdb/cdbaocsam.h +++ b/src/include/cdb/cdbaocsam.h @@ -261,6 +261,13 @@ typedef AOCSFetchDescData *AOCSFetchDesc; typedef struct AOCSUpdateDescData *AOCSUpdateDesc; typedef struct AOCSDeleteDescData *AOCSDeleteDesc; +typedef struct AOCSUniqueCheckDescData +{ + AppendOnlyBlockDirectory *blockDirectory; +} AOCSUniqueCheckDescData; + +typedef struct AOCSUniqueCheckDescData *AOCSUniqueCheckDesc; + /* * Descriptor for fetches from table via an index. */ diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index 8a9bc451bdb..42a680b24e7 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -55,6 +55,8 @@ #define DEFAULT_VARBLOCK_TEMPSPACE_LEN (4 * 1024) #define DEFAULT_FS_SAFE_WRITE_SIZE (0) +extern AppendOnlyBlockDirectory *GetAOBlockDirectory(Relation relation); + /* * AppendOnlyInsertDescData is used for inserting data into append-only * relations. It serves an equivalent purpose as AppendOnlyScanDescData @@ -368,6 +370,12 @@ typedef AppendOnlyFetchDescData *AppendOnlyFetchDesc; typedef struct AppendOnlyDeleteDescData *AppendOnlyDeleteDesc; +typedef struct AppendOnlyUniqueCheckDescData +{ + AppendOnlyBlockDirectory *blockDirectory; +} AppendOnlyUniqueCheckDescData; + +typedef struct AppendOnlyUniqueCheckDescData *AppendOnlyUniqueCheckDesc; /* * Descriptor for fetches from table via an index. */ diff --git a/src/include/cdb/cdbappendonlyblockdirectory.h b/src/include/cdb/cdbappendonlyblockdirectory.h index 0cb8c18dea7..d3d97b50065 100644 --- a/src/include/cdb/cdbappendonlyblockdirectory.h +++ b/src/include/cdb/cdbappendonlyblockdirectory.h @@ -92,6 +92,9 @@ typedef struct MinipagePerColumnGroup #define NUM_MINIPAGE_ENTRIES (((MaxHeapTupleSize)/8 - sizeof(HeapTupleHeaderData) - 64 * 3)\ / sizeof(MinipageEntry)) +#define IsMinipageFull(minipagePerColumnGroup) \ + ((minipagePerColumnGroup)->numMinipageEntries == (uint32) gp_blockdirectory_minipage_size) + /* * Define a structure for the append-only relation block directory. */ @@ -190,6 +193,9 @@ extern bool AppendOnlyBlockDirectory_GetEntry( AOTupleId *aoTupleId, int columnGroupNo, AppendOnlyBlockDirectoryEntry *directoryEntry); +extern bool AppendOnlyBlockDirectory_CoversTuple( + AppendOnlyBlockDirectory *blockDirectory, + AOTupleId *aoTupleId); extern void AppendOnlyBlockDirectory_Init_forInsert( AppendOnlyBlockDirectory *blockDirectory, Snapshot appendOnlyMetaDataSnapshot, @@ -248,6 +254,11 @@ extern void AppendOnlyBlockDirectory_DeleteSegmentFile( int segno, int columnGroupNo); +extern void AppendOnlyBlockDirectory_InsertPlaceholder(AppendOnlyBlockDirectory *blockDirectory, + int64 firstRowNum, + int64 fileOffset, + int columnGroupNo); + static inline uint32 minipage_size(uint32 nEntry) { diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 0e1c15bcf64..c168ab31938 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -293,6 +293,7 @@ extern bool Debug_bitmap_print_insert; extern bool enable_checksum_on_tables; extern int gp_max_local_distributed_cache; extern bool gp_local_distributed_cache_stats; +extern bool gp_appendonly_enable_unique_index; extern bool gp_appendonly_verify_block_checksums; extern bool gp_appendonly_verify_write_block; extern bool gp_appendonly_compaction; diff --git a/src/test/isolation2/expected/ao_blkdir.out b/src/test/isolation2/expected/ao_blkdir.out index 988ea9d3725..f44e4632343 100644 --- a/src/test/isolation2/expected/ao_blkdir.out +++ b/src/test/isolation2/expected/ao_blkdir.out @@ -230,6 +230,95 @@ SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id (0,2) | 1 | 0 | 0 | 292699 | 5274360 | 2 (162 rows) +-- Unique index white box tests +DROP TABLE ao_blkdir_test; +DROP +SET gp_appendonly_enable_unique_index TO ON; +SET +CREATE TABLE ao_blkdir_test(i int UNIQUE, j int) USING ao_row DISTRIBUTED BY (i); +CREATE + +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'ao_blkdir_test', 1, 1, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +1: BEGIN; +BEGIN +1&: INSERT INTO ao_blkdir_test VALUES (2, 2); + +-- There should be a placeholder row inserted to cover the rows for each INSERT +-- session, before we insert the 1st row in that session, that is only visible +-- to SNAPSHOT_DIRTY. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) +SET gp_select_invisible TO ON; +SET +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+--------------- + (0,1) | 1 | 0 | 0 | 1 | 0 | 1099511627775 +(1 row) +RESET gp_select_invisible; +RESET + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) while the INSERT is in progress. +2: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +1<: <... completed> +INSERT 1 + +-- The placeholder row is invisible to the INSERTing transaction. Since the +-- INSERT finished, there should be 1 visible blkdir row representing the INSERT. +1: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,2) | 1 | 0 | 0 | 1 | 0 | 1 +(1 row) + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERT finishes. The blkdir row representing +-- the INSERT should not be visible as the INSERTing transaction hasn't +-- committed yet. +2: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) + +1: COMMIT; +COMMIT + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERTing transaction commits. Since the +-- INSERTing transaction has committed, the blkdir row representing the INSERT +-- should be visible now. +2: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,2) | 1 | 0 | 0 | 1 | 0 | 1 +(1 row) + +DROP TABLE ao_blkdir_test; +DROP +RESET gp_appendonly_enable_unique_index; +RESET + -------------------------------------------------------------------------------- -- AOCO tables -------------------------------------------------------------------------------- @@ -628,3 +717,111 @@ SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_ (0,3) | 1 | 0 | 0 | 1317142 | 5275648 | 2 (0,4) | 1 | 1 | 0 | 1317142 | 5275648 | 2 (324 rows) + +-- Unique index white box tests +DROP TABLE aoco_blkdir_test; +DROP +SET gp_appendonly_enable_unique_index TO ON; +SET +CREATE TABLE aoco_blkdir_test(h int, i int UNIQUE, j int) USING ao_column DISTRIBUTED BY (i); +CREATE + +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'aoco_blkdir_test', 1, 1, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +1: BEGIN; +BEGIN +1&: INSERT INTO aoco_blkdir_test VALUES (2, 2, 2); + +-- There should be a placeholder row inserted to cover the rows for each INSERT +-- session (for the first non-dropped column), before we insert the 1st row in +-- that session, that is only visible to SNAPSHOT_DIRTY. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) +SET gp_select_invisible TO ON; +SET +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+--------------- + (0,1) | 1 | 0 | 0 | 1 | 0 | 1099511627775 +(1 row) +RESET gp_select_invisible; +RESET + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) while the INSERT is in progress. +2: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) + +-- Before the INSERT commits, if we try to drop column 'h', for which the +-- placeholder row was created, the session will block (locking). So it is +-- perfectly safe to use 1 placeholder row (and not have 1 placeholder/column) +3&: ALTER TABLE aoco_blkdir_test DROP COLUMN h; + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +1<: <... completed> +INSERT 1 + +-- The placeholder row is invisible to the INSERTing transaction. Since the +-- INSERT finished, there should be 3 visible blkdir rows representing the +-- INSERT, 1 for each column. +1: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,2) | 1 | 0 | 0 | 1 | 0 | 1 + (0,3) | 1 | 1 | 0 | 1 | 0 | 1 + (0,4) | 1 | 2 | 0 | 1 | 0 | 1 +(3 rows) + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERT finishes. The blkdir rows representing +-- the INSERT should not be visible as the INSERTing transaction hasn't +-- committed yet. +2: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- +(0 rows) + +1: COMMIT; +COMMIT + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERTing transaction commits. Since the +-- INSERTing transaction has committed, the blkdir rows representing the INSERT +-- should be visible now. +2: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,2) | 1 | 0 | 0 | 1 | 0 | 1 + (0,3) | 1 | 1 | 0 | 1 | 0 | 1 + (0,4) | 1 | 2 | 0 | 1 | 0 | 1 +(3 rows) + +-- Now even though the DROP COLUMN has finished, we would still be able to +-- properly resolve uniqueness checks (by consulting the first non-dropped +-- column's block directory row). +3<: <... completed> +ALTER +4: INSERT INTO aoco_blkdir_test VALUES (2, 2); +ERROR: duplicate key value violates unique constraint "aoco_blkdir_test_i_key" (seg0 192.168.0.148:7002 pid=176693) +DETAIL: Key (i)=(2) already exists. + +DROP TABLE aoco_blkdir_test; +DROP +RESET gp_appendonly_enable_unique_index; +RESET diff --git a/src/test/isolation2/expected/ao_unique_index.out b/src/test/isolation2/expected/ao_unique_index.out new file mode 100644 index 00000000000..090e34c8575 --- /dev/null +++ b/src/test/isolation2/expected/ao_unique_index.out @@ -0,0 +1,412 @@ +-- Tests to ensure that unique indexes work as expected w/ ao_row tables. + +-- We use a replicated table to test each table for ease in testing edge cases +-- where conflicts arise at block directory boundaries. We can treat the table +-- as if it were being populated in utility mode on a single segment, allowing +-- us to predict block directory entries without having to worry about the +-- table's distribution. + +SET gp_appendonly_enable_unique_index TO ON; +SET + +-- Case 1: Conflict with committed transaction---------------------------------- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +INSERT 329729 +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1); +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg1 192.168.0.148:7003 pid=205740) +DETAIL: Key (a)=(1) already exists. +INSERT INTO unique_index_ao_row VALUES (329729); +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg2 192.168.0.148:7004 pid=205741) +DETAIL: Key (a)=(329729) already exists. +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (329730); +INSERT 1 +DROP TABLE unique_index_ao_row; +DROP + +-- Case 2: Conflict within the same transaction--------------------------------- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +INSERT 329729 +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1); +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg0 192.168.0.148:7002 pid=205739) +DETAIL: Key (a)=(1) already exists. +END; +END +DROP TABLE unique_index_ao_row; +DROP + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +INSERT 329729 +-- should conflict +INSERT INTO unique_index_ao_row VALUES (329729); +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg0 192.168.0.148:7002 pid=205739) +DETAIL: Key (a)=(329729) already exists. +END; +END +DROP TABLE unique_index_ao_row; +DROP + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +INSERT 329729 +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (329730); +INSERT 1 +END; +END +DROP TABLE unique_index_ao_row; +DROP + +-- Case 3: Conflict with aborted transaction is not a conflict------------------ +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +INSERT 329729 +ABORT; +ABORT +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (1); +INSERT 1 +INSERT INTO unique_index_ao_row VALUES (329729); +INSERT 1 +INSERT INTO unique_index_ao_row VALUES (329730); +INSERT 1 +DROP TABLE unique_index_ao_row; +DROP + +-- Case 4: Conflict with to-be-committed transaction---------------------------- +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 2048 + 1 = 329729 rows), which spans +-- 2 block directory rows (1st row: [1,329728] ; 2nd row: [329729,329729]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 329728, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 329729, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 329730 and is immediately +-- successful. +-- 7. Tx 2 commits +-- 8. Txs 3,4,5 report unique constraint violation +-- 9. Tx 1 commits +-- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +1: BEGIN; +BEGIN +1: INSERT INTO unique_index_ao_row VALUES (0); +INSERT 1 +2: BEGIN; +BEGIN +2: INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +INSERT 329729 +3&: INSERT INTO unique_index_ao_row VALUES (1); +4&: INSERT INTO unique_index_ao_row VALUES (329728); +5&: INSERT INTO unique_index_ao_row VALUES (329729); +-- should succeed immediately +6: INSERT INTO unique_index_ao_row VALUES (329730); +INSERT 1 +2: COMMIT; +COMMIT +3<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg1 192.168.0.148:7003 pid=205769) +DETAIL: Key (a)=(1) already exists. +4<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg0 192.168.0.148:7002 pid=205777) +DETAIL: Key (a)=(329728) already exists. +5<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg1 192.168.0.148:7003 pid=205787) +DETAIL: Key (a)=(329729) already exists. +1: COMMIT; +COMMIT +DROP TABLE unique_index_ao_row; +DROP + +-- Case 5: Conflict with to-be-aborted transaction------------------------------ +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 2048 + 1 = 329729 rows), which spans +-- 2 block directory rows (1st row: [1,329728] ; 2nd row: [329729,329729]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 329728, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 329729, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 329730 and is immediately +-- successful. +-- 8. Tx 2 aborts +-- 9. Txs 3,4,5 report unique constraint violation +-- 10. Tx 1 commits +-- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +1: BEGIN; +BEGIN +1: INSERT INTO unique_index_ao_row VALUES (0); +INSERT 1 +2: BEGIN; +BEGIN +2: INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +INSERT 329729 +3&: INSERT INTO unique_index_ao_row VALUES (1); +4&: INSERT INTO unique_index_ao_row VALUES (329728); +5&: INSERT INTO unique_index_ao_row VALUES (329729); +-- should succeed immediately +6: INSERT INTO unique_index_ao_row VALUES (329730); +INSERT 1 +2: ABORT; +ABORT +3<: <... completed> +INSERT 1 +4<: <... completed> +INSERT 1 +5<: <... completed> +INSERT 1 +1: COMMIT; +COMMIT +DROP TABLE unique_index_ao_row; +DROP + +-- Case 6: Conflict with aborted rows following some committed rows ------------ +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +-- 1. Tx 1 commits rows 1-100. +-- 2. Tx 2 inserts rows 101-200 and then aborts. +-- 3. Tx 3 tries to insert row in range [101,200] and is immediately successful. +-- 4. Tx 4 tries to insert conflicting row in range [1,100] and raises unique +-- constraint violation. +-- 5. Tx 5 tries to insert row in range [201, ) and is immediately successful. +1: INSERT INTO unique_index_ao_row SELECT generate_series(1, 100); +INSERT 100 +2: BEGIN; +BEGIN +2: INSERT INTO unique_index_ao_row SELECT generate_series(101, 200); +INSERT 100 +2: ABORT; +ABORT +3: INSERT INTO unique_index_ao_row VALUES(102); +INSERT 1 +4: INSERT INTO unique_index_ao_row VALUES(2); +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg0 192.168.0.148:7002 pid=659656) +DETAIL: Key (a)=(2) already exists. +5: INSERT INTO unique_index_ao_row VALUES(202); +INSERT 1 +DROP TABLE unique_index_ao_row; +DROP + +-------------------------------------------------------------------------------- +----------------- More concurrent tests with fault injection ------------------ +-------------------------------------------------------------------------------- + +-- Case 7: Conflict with to-be-committed transaction while only 1 placeholder +-- row exists in the block directory-------------------------------------------- +-- +-- This case highlights the importance of the placeholder row, inserted at the +-- beginning of an INSERT command. +-- +-- 1. Uncommitted Tx 1 has inserted 3 out of its 10 rows and is suspended. +-- 2. Tx 2 inserts a conflicting row and blocks on Tx 1. +-- 3. Tx 3 inserts a non-conflicting row within the range [4,10] and is +-- immediately successful. (Index entries have been written only for [1,3] so +-- far, so conflicts shouldn't arise) +-- 4. Tx 4 inserts a non-conflicting row in range [11, ..) and should be +-- immediately successful. +-- 5. Now Tx 1 resumes and tries to insert a row in range [4,10] and reports a +-- unique constraint violation with Tx 3. +-- 6. Tx 2 succeeds as Tx 1 aborted. + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_row', 4, 4, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +1&: INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 10); +-- Wait until 3 rows have been successfully inserted into the index and Tx 1 +-- is just beginning to insert the 4th row. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) +2&: INSERT INTO unique_index_ao_row VALUES(2); +4: INSERT INTO unique_index_ao_row VALUES(11); +INSERT 1 +3: INSERT INTO unique_index_ao_row VALUES(4); +INSERT 1 +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +1<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg1 192.168.0.148:7003 pid=828519) +DETAIL: Key (a)=(4) already exists. +2<: <... completed> +INSERT 1 +DROP TABLE unique_index_ao_row; +DROP + +-- Case 8: Conflict with to-be-committed transaction - generalization of case 7 +-- where there are multiple minipages (and block directory rows) in play from +-- the same insert. +-- +-- This justifies why 1 placeholder row is enough and we don't need to flush a +-- placeholder row every time we insert a block directory row (i.e. start a new +-- in-memory minipage) throughout the course of a single insert. +-- +-- 1. Uncommitted Tx 1 has inserted (2048 * (161 * 2 + 1) + 3) = 661507 rows +-- and is suspended, enough rows to fill 2 entire minipages (covers +-- range [1,329728] and [329729,659456]) before suspension. +-- 2. Txs 2,3,4 inserts conflicting rows that map to the 1st minipage and block. +-- 3. Txs 5,6,7 inserts conflicting rows that map to the 2nd minipage and block. +-- 4. Tx 8 inserts a conflicting row that maps to the 3rd minipage, which is +-- currently only in-memory and it conflicts on the placeholder row and +-- blocks (showcases why 1 placeholder row is enough) +-- 5. Tx 9 inserts a non-conflicting row for which there is no index entry and +-- and is immediately successful (661510). +-- 6. Now Tx 1 resumes and tries to insert 661510 and reports a unique +-- constraint violation with Tx 9. +-- 7. All blocked Txs succeed. + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE + +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', '', 2, 2, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +1&: INSERT INTO unique_index_ao_row SELECT generate_series(1, 661510); + +-- Wait until we have inserted (2048 * (161 * 2 + 1) + 3) = 661507 rows and we +-- are about to insert the 661508th row. +SELECT gp_wait_until_triggered_fault('insert_new_entry_curr_minipage_full', 2, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_row', 4, 4, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) + +-- maps to 1st minipage +2&: INSERT INTO unique_index_ao_row VALUES(1); +3&: INSERT INTO unique_index_ao_row VALUES(300000); +4&: INSERT INTO unique_index_ao_row VALUES(329728); +-- maps to 2nd minipage +5&: INSERT INTO unique_index_ao_row VALUES(329729); +6&: INSERT INTO unique_index_ao_row VALUES(598000); +7&: INSERT INTO unique_index_ao_row VALUES(659456); +-- maps to 3rd minipage +8&: INSERT INTO unique_index_ao_row VALUES(661507); +-- no index entry exists for it, so should not conflict. +9: INSERT INTO unique_index_ao_row VALUES(661510); +INSERT 1 + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +1<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg1 192.168.0.148:7003 pid=630215) +DETAIL: Key (a)=(661510) already exists. +2<: <... completed> +INSERT 1 +3<: <... completed> +INSERT 1 +4<: <... completed> +INSERT 1 +5<: <... completed> +INSERT 1 +6<: <... completed> +INSERT 1 +7<: <... completed> +INSERT 1 +8<: <... completed> +INSERT 1 + +DROP TABLE unique_index_ao_row; +DROP + +-------------------------------------------------------------------------------- +--------------------------- Smoke tests for COPY ------------------------------- +-------------------------------------------------------------------------------- + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE + +1: BEGIN; +BEGIN +1: COPY unique_index_ao_row FROM PROGRAM 'seq 1 10'; +COPY 30 +-- concurrent tx inserting conflicting row should block. +2&: COPY unique_index_ao_row FROM PROGRAM 'seq 1 1'; +-- concurrent tx inserting non-conflicting rows should be successful. +3: COPY unique_index_ao_row FROM PROGRAM 'seq 11 20'; +COPY 30 +-- inserting a conflicting row in the same transaction should ERROR out. +1: COPY unique_index_ao_row FROM PROGRAM 'seq 1 1'; +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" +DETAIL: Key (a)=(1) already exists. +CONTEXT: COPY unique_index_ao_row, line 1 +-- now that tx 1 was aborted, tx 2 is successful. +2<: <... completed> +COPY 3 + +DROP TABLE unique_index_ao_row; +DROP +RESET gp_appendonly_enable_unique_index; +RESET diff --git a/src/test/isolation2/expected/aocs_unique_index.out b/src/test/isolation2/expected/aocs_unique_index.out new file mode 100644 index 00000000000..dfb72c87966 --- /dev/null +++ b/src/test/isolation2/expected/aocs_unique_index.out @@ -0,0 +1,412 @@ +-- Tests to ensure that unique indexes work as expected w/ ao_column tables. + +-- We use a replicated table to test each table for ease in testing edge cases +-- where conflicts arise at block directory boundaries. We can treat the table +-- as if it were being populated in utility mode on a single segment, allowing +-- us to predict block directory entries without having to worry about the +-- table's distribution. + +SET gp_appendonly_enable_unique_index TO ON; +SET + +-- Case 1: Conflict with committed transaction---------------------------------- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +INSERT 658491 +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1); +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg0 192.168.0.148:7002 pid=721860) +DETAIL: Key (a)=(1) already exists. +INSERT INTO unique_index_ao_column VALUES (658491); +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg0 192.168.0.148:7002 pid=721860) +DETAIL: Key (a)=(658491) already exists. +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (658492); +INSERT 1 +DROP TABLE unique_index_ao_column; +DROP + +-- Case 2: Conflict within the same transaction--------------------------------- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +INSERT 658491 +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1); +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg1 192.168.0.148:7003 pid=721861) +DETAIL: Key (a)=(1) already exists. +END; +END +DROP TABLE unique_index_ao_column; +DROP + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +INSERT 658491 +-- should conflict +INSERT INTO unique_index_ao_column VALUES (658491); +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg0 192.168.0.148:7002 pid=721860) +DETAIL: Key (a)=(658491) already exists. +END; +END +DROP TABLE unique_index_ao_column; +DROP + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +INSERT 658491 +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (658492); +INSERT 1 +END; +END +DROP TABLE unique_index_ao_column; +DROP + +-- Case 3: Conflict with aborted transaction is not a conflict------------------ +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +BEGIN; +BEGIN +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +INSERT 658491 +ABORT; +ABORT +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (1); +INSERT 1 +INSERT INTO unique_index_ao_column VALUES (658491); +INSERT 1 +INSERT INTO unique_index_ao_column VALUES (658492); +INSERT 1 +DROP TABLE unique_index_ao_column; +DROP + +-- Case 4: Conflict with to-be-committed transaction---------------------------- +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 4090 + 1 = 658491 rows), which spans +-- 2 block directory rows (1st row: [1,658490] ; 2nd row: [658491,658491]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 658490, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 658491, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 658492 and is immediately +-- successful. +-- 8. Tx 2 commits +-- 9. Txs 3,4,5 report unique constraint violation +-- 10. Tx 1 commits +-- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +1: BEGIN; +BEGIN +1: INSERT INTO unique_index_ao_column VALUES (0); +INSERT 1 +2: BEGIN; +BEGIN +2: INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +INSERT 658491 +3&: INSERT INTO unique_index_ao_column VALUES (1); +4&: INSERT INTO unique_index_ao_column VALUES (658490); +5&: INSERT INTO unique_index_ao_column VALUES (658491); +-- should succeed immediately +6: INSERT INTO unique_index_ao_column VALUES (658492); +INSERT 1 +2: COMMIT; +COMMIT +3<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg0 192.168.0.148:7002 pid=722493) +DETAIL: Key (a)=(1) already exists. +4<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg0 192.168.0.148:7002 pid=722502) +DETAIL: Key (a)=(658490) already exists. +5<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg1 192.168.0.148:7003 pid=722513) +DETAIL: Key (a)=(658491) already exists. +1: COMMIT; +COMMIT +DROP TABLE unique_index_ao_column; +DROP + +-- Case 5: Conflict with to-be-aborted transaction------------------------------ +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 4090 + 1 = 658491 rows), which spans +-- 2 block directory rows (1st row: [1,658490] ; 2nd row: [658491,658491]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 658490, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 658491, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 658492 and is immediately +-- successful. +-- 8. Tx 2 aborts +-- 9. Txs 3,4,5 report unique constraint violation +-- 10. Tx 1 commits +-- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +1: BEGIN; +BEGIN +1: INSERT INTO unique_index_ao_column VALUES (0); +INSERT 1 +2: BEGIN; +BEGIN +2: INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +INSERT 658491 +3&: INSERT INTO unique_index_ao_column VALUES (1); +4&: INSERT INTO unique_index_ao_column VALUES (658490); +5&: INSERT INTO unique_index_ao_column VALUES (658491); +-- should succeed immediately +6: INSERT INTO unique_index_ao_column VALUES (658492); +INSERT 1 +2: ABORT; +ABORT +3<: <... completed> +INSERT 1 +4<: <... completed> +INSERT 1 +5<: <... completed> +INSERT 1 +1: COMMIT; +COMMIT +DROP TABLE unique_index_ao_column; +DROP + +-- Case 6: Conflict with aborted rows following some committed rows ------------ +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +-- 1. Tx 1 commits rows 1-100. +-- 2. Tx 2 inserts rows 101-200 and then aborts. +-- 3. Tx 3 tries to insert row in range [101,200] and is immediately successful. +-- 4. Tx 4 tries to insert conflicting row in range [1,100] and raises unique +-- constraint violation. +-- 5. Tx 5 tries to insert row in range [201, ) and is immediately successful. +1: INSERT INTO unique_index_ao_column SELECT generate_series(1, 100); +INSERT 100 +2: BEGIN; +BEGIN +2: INSERT INTO unique_index_ao_column SELECT generate_series(101, 200); +INSERT 100 +2: ABORT; +ABORT +3: INSERT INTO unique_index_ao_column VALUES(102); +INSERT 1 +4: INSERT INTO unique_index_ao_column VALUES(2); +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg0 192.168.0.148:7002 pid=659656) +DETAIL: Key (a)=(2) already exists. +5: INSERT INTO unique_index_ao_column VALUES(202); +INSERT 1 +DROP TABLE unique_index_ao_column; +DROP + +-------------------------------------------------------------------------------- +----------------- More concurrent tests with fault injection ------------------ +-------------------------------------------------------------------------------- + +-- Case 7: Conflict with to-be-committed transaction while only a placeholder +-- row exists in the block directory-------------------------------------------- +-- +-- This case highlights the importance of the placeholder row, inserted at the +-- beginning of an INSERT command. +-- +-- 1. Uncommitted Tx 1 has inserted 3 out of its 10 rows and is suspended. +-- 2. Tx 2 inserts a conflicting row and blocks on Tx 1. +-- 3. Tx 3 inserts a non-conflicting row within the range [4,10] and is +-- immediately successful. (Index entries have been written only for [1,3] so +-- far, so conflicts shouldn't arise) +-- 4. Tx 4 inserts a non-conflicting row in range [11, ..) and should be +-- immediately successful. +-- 5. Now Tx 1 resumes and tries to insert a row in range [4,10] and reports a +-- unique constraint violation with Tx 3. +-- 6. Tx 2 succeeds as Tx 1 aborted. + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_column', 4, 4, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +1&: INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 10); +-- Wait until 3 rows have been successfully inserted into the index and Tx 1 +-- is just beginning to insert the 4th row. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) +2&: INSERT INTO unique_index_ao_column VALUES(2); +4: INSERT INTO unique_index_ao_column VALUES(11); +INSERT 1 +3: INSERT INTO unique_index_ao_column VALUES(4); +INSERT 1 +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +1<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg1 192.168.0.148:7003 pid=828519) +DETAIL: Key (a)=(4) already exists. +2<: <... completed> +INSERT 1 +DROP TABLE unique_index_ao_column; +DROP + +-- Case 8: Conflict with to-be-committed transaction - generalization of case 7 +-- where there are multiple minipages (and block directory rows) in play from +-- the same insert. +-- +-- This justifies why 1 placeholder row is enough and we don't need to flush a +-- placeholder row every time we insert a block directory row (i.e. start a new +-- in-memory minipage) throughout the course of a single insert. +-- +-- 1. Uncommitted Tx 1 has inserted (4090 * (161 * 2 + 1) + 4) = 1321074 rows +-- and is suspended, enough rows to fill 2 entire minipages (covers +-- range [1,658490] and [658491,1321070]) before suspension. +-- 2. Txs 2,3,4 inserts conflicting rows that map to the 1st minipage and block. +-- 3. Txs 5,6,7 inserts conflicting rows that map to the 2nd minipage and block. +-- 4. Tx 8 inserts a conflicting row that maps to the 3rd minipage, which is +-- currently only in-memory and it conflicts on the placeholder row and +-- blocks (showcases why 1 placeholder row is enough) +-- 5. Tx 9 inserts a non-conflicting row for which there is no index entry and +-- and is immediately successful (1321075). +-- 6. Now Tx 1 resumes and tries to insert 1321075 and reports a unique +-- constraint violation with Tx 9. +-- 7. All blocked Txs succeed. + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE + +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', '', 2, 2, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +1&: INSERT INTO unique_index_ao_column SELECT generate_series(1, 1321075); + +-- Wait until we have inserted (4090 * (161 * 2 + 1) + 3) = 1321073 rows and we +-- are about to insert the 1321074th row. +SELECT gp_wait_until_triggered_fault('insert_new_entry_curr_minipage_full', 2, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_column', 4, 4, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) + +-- maps to 1st minipage +2&: INSERT INTO unique_index_ao_column VALUES(1); +3&: INSERT INTO unique_index_ao_column VALUES(300000); +4&: INSERT INTO unique_index_ao_column VALUES(658490); +-- maps to 2nd minipage +5&: INSERT INTO unique_index_ao_column VALUES(658491); +6&: INSERT INTO unique_index_ao_column VALUES(700000); +7&: INSERT INTO unique_index_ao_column VALUES(1321070); +-- maps to 3rd minipage +8&: INSERT INTO unique_index_ao_column VALUES(1321071); +-- no index entry exists for it, so should not conflict. +9: INSERT INTO unique_index_ao_column VALUES(1321075); +INSERT 1 + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +1<: <... completed> +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg2 192.168.0.148:7004 pid=735802) +DETAIL: Key (a)=(1321075) already exists. +2<: <... completed> +INSERT 1 +3<: <... completed> +INSERT 1 +4<: <... completed> +INSERT 1 +5<: <... completed> +INSERT 1 +6<: <... completed> +INSERT 1 +7<: <... completed> +INSERT 1 +8<: <... completed> +INSERT 1 + +DROP TABLE unique_index_ao_column; +DROP + +-------------------------------------------------------------------------------- +--------------------------- Smoke tests for COPY ------------------------------- +-------------------------------------------------------------------------------- + +CREATE TABLE unique_index_ao_column (a INT unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE + +1: BEGIN; +BEGIN +1: COPY unique_index_ao_column FROM PROGRAM 'seq 1 10'; +COPY 30 +-- concurrent tx inserting conflicting row should block. +2&: COPY unique_index_ao_column FROM PROGRAM 'seq 1 1'; +-- concurrent tx inserting non-conflicting rows should be successful. +3: COPY unique_index_ao_column FROM PROGRAM 'seq 11 20'; +COPY 30 +-- inserting a conflicting row in the same transaction should ERROR out. +1: COPY unique_index_ao_column FROM PROGRAM 'seq 1 1'; +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" +DETAIL: Key (a)=(1) already exists. +CONTEXT: COPY unique_index_ao_column, line 1 +-- now that tx 1 was aborted, tx 2 is successful. +2<: <... completed> +COPY 3 + +DROP TABLE unique_index_ao_column; +DROP +RESET gp_appendonly_enable_unique_index; +RESET diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 3437378f9a0..2b8a3e12946 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -89,7 +89,8 @@ test: distributedlog-bug test: invalidated_toast_index test: distributed_snapshot test: gp_collation -test: ao_upgrade ao_blkdir +test: ao_upgrade +test: ao_blkdir test: bitmap_index_concurrent test: bitmap_index_crash test: bitmap_update_words_backup_block @@ -291,3 +292,7 @@ test: check_gxid # test if GUC is synchronized from the QD to QEs. test: sync_guc + +# Tests for unique indexes on AO/CO tables (uses fault injector) +test: ao_unique_index +test: aocs_unique_index diff --git a/src/test/isolation2/sql/ao_blkdir.sql b/src/test/isolation2/sql/ao_blkdir.sql index 73935d06800..217244d6466 100644 --- a/src/test/isolation2/sql/ao_blkdir.sql +++ b/src/test/isolation2/sql/ao_blkdir.sql @@ -41,6 +41,61 @@ INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(1, 292700) i; SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; +-- Unique index white box tests +DROP TABLE ao_blkdir_test; +SET gp_appendonly_enable_unique_index TO ON; +CREATE TABLE ao_blkdir_test(i int UNIQUE, j int) USING ao_row DISTRIBUTED BY (i); + +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'ao_blkdir_test', 1, 1, 0, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content = 0; +1: BEGIN; +1&: INSERT INTO ao_blkdir_test VALUES (2, 2); + +-- There should be a placeholder row inserted to cover the rows for each INSERT +-- session, before we insert the 1st row in that session, that is only visible +-- to SNAPSHOT_DIRTY. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 1, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content = 0; +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; +SET gp_select_invisible TO ON; +SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; +RESET gp_select_invisible; + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) while the INSERT is in progress. +2: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content = 0; +1<: + +-- The placeholder row is invisible to the INSERTing transaction. Since the +-- INSERT finished, there should be 1 visible blkdir row representing the INSERT. +1: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERT finishes. The blkdir row representing +-- the INSERT should not be visible as the INSERTing transaction hasn't +-- committed yet. +2: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +1: COMMIT; + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERTing transaction commits. Since the +-- INSERTing transaction has committed, the blkdir row representing the INSERT +-- should be visible now. +2: SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +DROP TABLE ao_blkdir_test; +RESET gp_appendonly_enable_unique_index; + -------------------------------------------------------------------------------- -- AOCO tables -------------------------------------------------------------------------------- @@ -81,3 +136,70 @@ INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(1, 1317143) i; -- overflow rows. SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +-- Unique index white box tests +DROP TABLE aoco_blkdir_test; +SET gp_appendonly_enable_unique_index TO ON; +CREATE TABLE aoco_blkdir_test(h int, i int UNIQUE, j int) USING ao_column DISTRIBUTED BY (i); + +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'aoco_blkdir_test', 1, 1, 0, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content = 0; +1: BEGIN; +1&: INSERT INTO aoco_blkdir_test VALUES (2, 2, 2); + +-- There should be a placeholder row inserted to cover the rows for each INSERT +-- session (for the first non-dropped column), before we insert the 1st row in +-- that session, that is only visible to SNAPSHOT_DIRTY. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 1, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content = 0; +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; +SET gp_select_invisible TO ON; +SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') +WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; +RESET gp_select_invisible; + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) while the INSERT is in progress. +2: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +-- Before the INSERT commits, if we try to drop column 'h', for which the +-- placeholder row was created, the session will block (locking). So it is +-- perfectly safe to use 1 placeholder row (and not have 1 placeholder/column) +3&: ALTER TABLE aoco_blkdir_test DROP COLUMN h; + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content = 0; +1<: + +-- The placeholder row is invisible to the INSERTing transaction. Since the +-- INSERT finished, there should be 3 visible blkdir rows representing the +-- INSERT, 1 for each column. +1: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERT finishes. The blkdir rows representing +-- the INSERT should not be visible as the INSERTing transaction hasn't +-- committed yet. +2: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +1: COMMIT; + +-- The placeholder row is invisible to other transactions (that don't perform a +-- uniqueness check) even after the INSERTing transaction commits. Since the +-- INSERTing transaction has committed, the blkdir rows representing the INSERT +-- should be visible now. +2: SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_id') + WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + +-- Now even though the DROP COLUMN has finished, we would still be able to +-- properly resolve uniqueness checks (by consulting the first non-dropped +-- column's block directory row). +3<: +4: INSERT INTO aoco_blkdir_test VALUES (2, 2); + +DROP TABLE aoco_blkdir_test; +RESET gp_appendonly_enable_unique_index; diff --git a/src/test/isolation2/sql/ao_unique_index.sql b/src/test/isolation2/sql/ao_unique_index.sql new file mode 100644 index 00000000000..a06f1bbf5c9 --- /dev/null +++ b/src/test/isolation2/sql/ao_unique_index.sql @@ -0,0 +1,275 @@ +-- Tests to ensure that unique indexes work as expected w/ ao_row tables. + +-- We use a replicated table to test each table for ease in testing edge cases +-- where conflicts arise at block directory boundaries. We can treat the table +-- as if it were being populated in utility mode on a single segment, allowing +-- us to predict block directory entries without having to worry about the +-- table's distribution. + +SET gp_appendonly_enable_unique_index TO ON; + +-- Case 1: Conflict with committed transaction---------------------------------- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1); +INSERT INTO unique_index_ao_row VALUES (329729); +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (329730); +DROP TABLE unique_index_ao_row; + +-- Case 2: Conflict within the same transaction--------------------------------- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1); +END; +DROP TABLE unique_index_ao_row; + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +-- should conflict +INSERT INTO unique_index_ao_row VALUES (329729); +END; +DROP TABLE unique_index_ao_row; + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (329730); +END; +DROP TABLE unique_index_ao_row; + +-- Case 3: Conflict with aborted transaction is not a conflict------------------ +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +ABORT; +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (1); +INSERT INTO unique_index_ao_row VALUES (329729); +INSERT INTO unique_index_ao_row VALUES (329730); +DROP TABLE unique_index_ao_row; + +-- Case 4: Conflict with to-be-committed transaction---------------------------- +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 2048 + 1 = 329729 rows), which spans +-- 2 block directory rows (1st row: [1,329728] ; 2nd row: [329729,329729]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 329728, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 329729, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 329730 and is immediately +-- successful. +-- 7. Tx 2 commits +-- 8. Txs 3,4,5 report unique constraint violation +-- 9. Tx 1 commits +-- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +1: BEGIN; +1: INSERT INTO unique_index_ao_row VALUES (0); +2: BEGIN; +2: INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +3&: INSERT INTO unique_index_ao_row VALUES (1); +4&: INSERT INTO unique_index_ao_row VALUES (329728); +5&: INSERT INTO unique_index_ao_row VALUES (329729); +-- should succeed immediately +6: INSERT INTO unique_index_ao_row VALUES (329730); +2: COMMIT; +3<: +4<: +5<: +1: COMMIT; +DROP TABLE unique_index_ao_row; + +-- Case 5: Conflict with to-be-aborted transaction------------------------------ +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 2048 + 1 = 329729 rows), which spans +-- 2 block directory rows (1st row: [1,329728] ; 2nd row: [329729,329729]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 329728, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 329729, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 329730 and is immediately +-- successful. +-- 8. Tx 2 aborts +-- 9. Txs 3,4,5 report unique constraint violation +-- 10. Tx 1 commits +-- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +1: BEGIN; +1: INSERT INTO unique_index_ao_row VALUES (0); +2: BEGIN; +2: INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 329729); +3&: INSERT INTO unique_index_ao_row VALUES (1); +4&: INSERT INTO unique_index_ao_row VALUES (329728); +5&: INSERT INTO unique_index_ao_row VALUES (329729); +-- should succeed immediately +6: INSERT INTO unique_index_ao_row VALUES (329730); +2: ABORT; +3<: +4<: +5<: +1: COMMIT; +DROP TABLE unique_index_ao_row; + +-- Case 6: Conflict with aborted rows following some committed rows ------------ +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +-- 1. Tx 1 commits rows 1-100. +-- 2. Tx 2 inserts rows 101-200 and then aborts. +-- 3. Tx 3 tries to insert row in range [101,200] and is immediately successful. +-- 4. Tx 4 tries to insert conflicting row in range [1,100] and raises unique +-- constraint violation. +-- 5. Tx 5 tries to insert row in range [201, ) and is immediately successful. +1: INSERT INTO unique_index_ao_row SELECT generate_series(1, 100); +2: BEGIN; +2: INSERT INTO unique_index_ao_row SELECT generate_series(101, 200); +2: ABORT; +3: INSERT INTO unique_index_ao_row VALUES(102); +4: INSERT INTO unique_index_ao_row VALUES(2); +5: INSERT INTO unique_index_ao_row VALUES(202); +DROP TABLE unique_index_ao_row; + +-------------------------------------------------------------------------------- +----------------- More concurrent tests with fault injection ------------------ +-------------------------------------------------------------------------------- + +-- Case 7: Conflict with to-be-committed transaction while only 1 placeholder +-- row exists in the block directory-------------------------------------------- +-- +-- This case highlights the importance of the placeholder row, inserted at the +-- beginning of an INSERT command. +-- +-- 1. Uncommitted Tx 1 has inserted 3 out of its 10 rows and is suspended. +-- 2. Tx 2 inserts a conflicting row and blocks on Tx 1. +-- 3. Tx 3 inserts a non-conflicting row within the range [4,10] and is +-- immediately successful. (Index entries have been written only for [1,3] so +-- far, so conflicts shouldn't arise) +-- 4. Tx 4 inserts a non-conflicting row in range [11, ..) and should be +-- immediately successful. +-- 5. Now Tx 1 resumes and tries to insert a row in range [4,10] and reports a +-- unique constraint violation with Tx 3. +-- 6. Tx 2 succeeds as Tx 1 aborted. + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_row', 4, 4, 0, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1&: INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 10); +-- Wait until 3 rows have been successfully inserted into the index and Tx 1 +-- is just beginning to insert the 4th row. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +2&: INSERT INTO unique_index_ao_row VALUES(2); +4: INSERT INTO unique_index_ao_row VALUES(11); +3: INSERT INTO unique_index_ao_row VALUES(4); +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1<: +2<: +DROP TABLE unique_index_ao_row; + +-- Case 8: Conflict with to-be-committed transaction - generalization of case 7 +-- where there are multiple minipages (and block directory rows) in play from +-- the same insert. +-- +-- This justifies why 1 placeholder row is enough and we don't need to flush a +-- placeholder row every time we insert a block directory row (i.e. start a new +-- in-memory minipage) throughout the course of a single insert. +-- +-- 1. Uncommitted Tx 1 has inserted (2048 * (161 * 2 + 1) + 3) = 661507 rows +-- and is suspended, enough rows to fill 2 entire minipages (covers +-- range [1,329728] and [329729,659456]) before suspension. +-- 2. Txs 2,3,4 inserts conflicting rows that map to the 1st minipage and block. +-- 3. Txs 5,6,7 inserts conflicting rows that map to the 2nd minipage and block. +-- 4. Tx 8 inserts a conflicting row that maps to the 3rd minipage, which is +-- currently only in-memory and it conflicts on the placeholder row and +-- blocks (showcases why 1 placeholder row is enough) +-- 5. Tx 9 inserts a non-conflicting row for which there is no index entry and +-- and is immediately successful (661510). +-- 6. Now Tx 1 resumes and tries to insert 661510 and reports a unique +-- constraint violation with Tx 9. +-- 7. All blocked Txs succeed. + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; + +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', '', 2, 2, 0, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1&: INSERT INTO unique_index_ao_row SELECT generate_series(1, 661510); + +-- Wait until we have inserted (2048 * (161 * 2 + 1) + 3) = 661507 rows and we +-- are about to insert the 661508th row. +SELECT gp_wait_until_triggered_fault('insert_new_entry_curr_minipage_full', 2, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_row', 4, 4, 0, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'reset', dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +-- maps to 1st minipage +2&: INSERT INTO unique_index_ao_row VALUES(1); +3&: INSERT INTO unique_index_ao_row VALUES(300000); +4&: INSERT INTO unique_index_ao_row VALUES(329728); +-- maps to 2nd minipage +5&: INSERT INTO unique_index_ao_row VALUES(329729); +6&: INSERT INTO unique_index_ao_row VALUES(598000); +7&: INSERT INTO unique_index_ao_row VALUES(659456); +-- maps to 3rd minipage +8&: INSERT INTO unique_index_ao_row VALUES(661507); +-- no index entry exists for it, so should not conflict. +9: INSERT INTO unique_index_ao_row VALUES(661510); + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) +FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +1<: +2<: +3<: +4<: +5<: +6<: +7<: +8<: + +DROP TABLE unique_index_ao_row; + +-------------------------------------------------------------------------------- +--------------------------- Smoke tests for COPY ------------------------------- +-------------------------------------------------------------------------------- + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; + +1: BEGIN; +1: COPY unique_index_ao_row FROM PROGRAM 'seq 1 10'; +-- concurrent tx inserting conflicting row should block. +2&: COPY unique_index_ao_row FROM PROGRAM 'seq 1 1'; +-- concurrent tx inserting non-conflicting rows should be successful. +3: COPY unique_index_ao_row FROM PROGRAM 'seq 11 20'; +-- inserting a conflicting row in the same transaction should ERROR out. +1: COPY unique_index_ao_row FROM PROGRAM 'seq 1 1'; +-- now that tx 1 was aborted, tx 2 is successful. +2<: + +DROP TABLE unique_index_ao_row; +RESET gp_appendonly_enable_unique_index; diff --git a/src/test/isolation2/sql/aocs_unique_index.sql b/src/test/isolation2/sql/aocs_unique_index.sql new file mode 100644 index 00000000000..80e9d9389c1 --- /dev/null +++ b/src/test/isolation2/sql/aocs_unique_index.sql @@ -0,0 +1,275 @@ +-- Tests to ensure that unique indexes work as expected w/ ao_column tables. + +-- We use a replicated table to test each table for ease in testing edge cases +-- where conflicts arise at block directory boundaries. We can treat the table +-- as if it were being populated in utility mode on a single segment, allowing +-- us to predict block directory entries without having to worry about the +-- table's distribution. + +SET gp_appendonly_enable_unique_index TO ON; + +-- Case 1: Conflict with committed transaction---------------------------------- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1); +INSERT INTO unique_index_ao_column VALUES (658491); +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (658492); +DROP TABLE unique_index_ao_column; + +-- Case 2: Conflict within the same transaction--------------------------------- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1); +END; +DROP TABLE unique_index_ao_column; + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +-- should conflict +INSERT INTO unique_index_ao_column VALUES (658491); +END; +DROP TABLE unique_index_ao_column; + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (658492); +END; +DROP TABLE unique_index_ao_column; + +-- Case 3: Conflict with aborted transaction is not a conflict------------------ +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +ABORT; +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (1); +INSERT INTO unique_index_ao_column VALUES (658491); +INSERT INTO unique_index_ao_column VALUES (658492); +DROP TABLE unique_index_ao_column; + +-- Case 4: Conflict with to-be-committed transaction---------------------------- +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 4090 + 1 = 658491 rows), which spans +-- 2 block directory rows (1st row: [1,658490] ; 2nd row: [658491,658491]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 658490, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 658491, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 658492 and is immediately +-- successful. +-- 8. Tx 2 commits +-- 9. Txs 3,4,5 report unique constraint violation +-- 10. Tx 1 commits +-- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +1: BEGIN; +1: INSERT INTO unique_index_ao_column VALUES (0); +2: BEGIN; +2: INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +3&: INSERT INTO unique_index_ao_column VALUES (1); +4&: INSERT INTO unique_index_ao_column VALUES (658490); +5&: INSERT INTO unique_index_ao_column VALUES (658491); +-- should succeed immediately +6: INSERT INTO unique_index_ao_column VALUES (658492); +2: COMMIT; +3<: +4<: +5<: +1: COMMIT; +DROP TABLE unique_index_ao_column; + +-- Case 5: Conflict with to-be-aborted transaction------------------------------ +-- +-- 1. Uncommitted tx 1 has inserted non-conflicting key = 0. +-- 2. Uncommitted tx 2 has inserted (161 * 4090 + 1 = 658491 rows), which spans +-- 2 block directory rows (1st row: [1,658490] ; 2nd row: [658491,658491]) +-- 3. Tx 3 tries to insert conflicting key = 2, which maps to the second rownum +-- covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 4. Tx 4 tries to insert conflicting key = 658490, which maps to the last +-- rownum covered by the 1st block directory row of seg 1, and blocks on tx 2. +-- 5. Tx 5 tries to insert conflicting key = 658491, which maps to the first +-- rownum covered by the 2nd block directory row of seg 1, and blocks on tx 2. +-- 6. Tx 6 tries to insert non-conflicting key = 658492 and is immediately +-- successful. +-- 8. Tx 2 aborts +-- 9. Txs 3,4,5 report unique constraint violation +-- 10. Tx 1 commits +-- +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +1: BEGIN; +1: INSERT INTO unique_index_ao_column VALUES (0); +2: BEGIN; +2: INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 658491); +3&: INSERT INTO unique_index_ao_column VALUES (1); +4&: INSERT INTO unique_index_ao_column VALUES (658490); +5&: INSERT INTO unique_index_ao_column VALUES (658491); +-- should succeed immediately +6: INSERT INTO unique_index_ao_column VALUES (658492); +2: ABORT; +3<: +4<: +5<: +1: COMMIT; +DROP TABLE unique_index_ao_column; + +-- Case 6: Conflict with aborted rows following some committed rows ------------ +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +-- 1. Tx 1 commits rows 1-100. +-- 2. Tx 2 inserts rows 101-200 and then aborts. +-- 3. Tx 3 tries to insert row in range [101,200] and is immediately successful. +-- 4. Tx 4 tries to insert conflicting row in range [1,100] and raises unique +-- constraint violation. +-- 5. Tx 5 tries to insert row in range [201, ) and is immediately successful. +1: INSERT INTO unique_index_ao_column SELECT generate_series(1, 100); +2: BEGIN; +2: INSERT INTO unique_index_ao_column SELECT generate_series(101, 200); +2: ABORT; +3: INSERT INTO unique_index_ao_column VALUES(102); +4: INSERT INTO unique_index_ao_column VALUES(2); +5: INSERT INTO unique_index_ao_column VALUES(202); +DROP TABLE unique_index_ao_column; + +-------------------------------------------------------------------------------- +----------------- More concurrent tests with fault injection ------------------ +-------------------------------------------------------------------------------- + +-- Case 7: Conflict with to-be-committed transaction while only a placeholder +-- row exists in the block directory-------------------------------------------- +-- +-- This case highlights the importance of the placeholder row, inserted at the +-- beginning of an INSERT command. +-- +-- 1. Uncommitted Tx 1 has inserted 3 out of its 10 rows and is suspended. +-- 2. Tx 2 inserts a conflicting row and blocks on Tx 1. +-- 3. Tx 3 inserts a non-conflicting row within the range [4,10] and is +-- immediately successful. (Index entries have been written only for [1,3] so +-- far, so conflicts shouldn't arise) +-- 4. Tx 4 inserts a non-conflicting row in range [11, ..) and should be +-- immediately successful. +-- 5. Now Tx 1 resumes and tries to insert a row in range [4,10] and reports a +-- unique constraint violation with Tx 3. +-- 6. Tx 2 succeeds as Tx 1 aborted. + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_column', 4, 4, 0, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1&: INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 10); +-- Wait until 3 rows have been successfully inserted into the index and Tx 1 +-- is just beginning to insert the 4th row. +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +2&: INSERT INTO unique_index_ao_column VALUES(2); +4: INSERT INTO unique_index_ao_column VALUES(11); +3: INSERT INTO unique_index_ao_column VALUES(4); +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1<: +2<: +DROP TABLE unique_index_ao_column; + +-- Case 8: Conflict with to-be-committed transaction - generalization of case 7 +-- where there are multiple minipages (and block directory rows) in play from +-- the same insert. +-- +-- This justifies why 1 placeholder row is enough and we don't need to flush a +-- placeholder row every time we insert a block directory row (i.e. start a new +-- in-memory minipage) throughout the course of a single insert. +-- +-- 1. Uncommitted Tx 1 has inserted (4090 * (161 * 2 + 1) + 4) = 1321074 rows +-- and is suspended, enough rows to fill 2 entire minipages (covers +-- range [1,658490] and [658491,1321070]) before suspension. +-- 2. Txs 2,3,4 inserts conflicting rows that map to the 1st minipage and block. +-- 3. Txs 5,6,7 inserts conflicting rows that map to the 2nd minipage and block. +-- 4. Tx 8 inserts a conflicting row that maps to the 3rd minipage, which is +-- currently only in-memory and it conflicts on the placeholder row and +-- blocks (showcases why 1 placeholder row is enough) +-- 5. Tx 9 inserts a non-conflicting row for which there is no index entry and +-- and is immediately successful (1321075). +-- 6. Now Tx 1 resumes and tries to insert 1321075 and reports a unique +-- constraint violation with Tx 9. +-- 7. All blocked Txs succeed. + +CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column + DISTRIBUTED REPLICATED; + +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', '', 2, 2, 0, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1&: INSERT INTO unique_index_ao_column SELECT generate_series(1, 1321075); + +-- Wait until we have inserted (4090 * (161 * 2 + 1) + 3) = 1321073 rows and we +-- are about to insert the 1321074th row. +SELECT gp_wait_until_triggered_fault('insert_new_entry_curr_minipage_full', 2, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_ao_column', 4, 4, 0, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'reset', dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +SELECT gp_wait_until_triggered_fault('appendonly_insert', 4, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +-- maps to 1st minipage +2&: INSERT INTO unique_index_ao_column VALUES(1); +3&: INSERT INTO unique_index_ao_column VALUES(300000); +4&: INSERT INTO unique_index_ao_column VALUES(658490); +-- maps to 2nd minipage +5&: INSERT INTO unique_index_ao_column VALUES(658491); +6&: INSERT INTO unique_index_ao_column VALUES(700000); +7&: INSERT INTO unique_index_ao_column VALUES(1321070); +-- maps to 3rd minipage +8&: INSERT INTO unique_index_ao_column VALUES(1321071); +-- no index entry exists for it, so should not conflict. +9: INSERT INTO unique_index_ao_column VALUES(1321075); + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +1<: +2<: +3<: +4<: +5<: +6<: +7<: +8<: + +DROP TABLE unique_index_ao_column; + +-------------------------------------------------------------------------------- +--------------------------- Smoke tests for COPY ------------------------------- +-------------------------------------------------------------------------------- + +CREATE TABLE unique_index_ao_column (a INT unique) USING ao_column + DISTRIBUTED REPLICATED; + +1: BEGIN; +1: COPY unique_index_ao_column FROM PROGRAM 'seq 1 10'; +-- concurrent tx inserting conflicting row should block. +2&: COPY unique_index_ao_column FROM PROGRAM 'seq 1 1'; +-- concurrent tx inserting non-conflicting rows should be successful. +3: COPY unique_index_ao_column FROM PROGRAM 'seq 11 20'; +-- inserting a conflicting row in the same transaction should ERROR out. +1: COPY unique_index_ao_column FROM PROGRAM 'seq 1 1'; +-- now that tx 1 was aborted, tx 2 is successful. +2<: + +DROP TABLE unique_index_ao_column; +RESET gp_appendonly_enable_unique_index; From c4c9c5f7b4e83b06f28f84dd7981804005aa2293 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Fri, 4 Nov 2022 15:19:08 -0700 Subject: [PATCH 05/19] DELETE on AO/CO tables with unique indexes This commit brings DELETE functionality to AO/CO tables with unique indexes. To support DELETE, we simply have to check the visibility map in case a conflict should arise while performing uniqueness checks, to see if the tuple has been deleted. Further, we should only perform such a check if the conflicting tuple was already committed (and not inserted by a concurrent transaction). Since, by design AO/CO tables doesn't support concurrent DELETEs and UPDATEs, we don't have to worry about those during insert existence checks or in DELETE code paths. We simply add to the UniqueCheckDescs a visimap struct, which we initialize, as we would do for a regular index fetch operation. We use this struct to perform the aforementioned lookups. --- src/backend/access/aocs/aocsam_handler.c | 67 +++++++++++++- src/backend/access/appendonly/README.md | 27 +++++- .../access/appendonly/appendonlyam_handler.c | 66 +++++++++++++- src/include/cdb/cdbaocsam.h | 1 + src/include/cdb/cdbappendonlyam.h | 1 + src/test/regress/greenplum_schedule | 2 +- .../uao_dml_unique_index_delete.source | 73 +++++++++++++++ .../uao_dml_unique_index_delete.source | 90 +++++++++++++++++++ 8 files changed, 319 insertions(+), 8 deletions(-) create mode 100644 src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source create mode 100644 src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 10a1e50d207..4a27fc4fb53 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -302,9 +302,16 @@ aoco_dml_finish(Relation relation, CmdType operation) if (state->uniqueCheckDesc) { + /* clean up the block directory */ AppendOnlyBlockDirectory_End_forSearch(state->uniqueCheckDesc->blockDirectory); pfree(state->uniqueCheckDesc->blockDirectory); state->uniqueCheckDesc->blockDirectory = NULL; + + /* clean up the visimap */ + AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); + pfree(state->uniqueCheckDesc->visimap); + state->uniqueCheckDesc->visimap = NULL; + pfree(state->uniqueCheckDesc); state->uniqueCheckDesc = NULL; } @@ -454,13 +461,28 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) { MemoryContext oldcxt; AOCSUniqueCheckDesc uniqueCheckDesc; + Oid visimaprelid; + Oid visimapidxid; oldcxt = MemoryContextSwitchTo(aocoLocal.stateCxt); uniqueCheckDesc = palloc0(sizeof(AOCSUniqueCheckDescData)); + + /* Initialize the block directory */ uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, snapshot, NULL, -1, relation, relation->rd_att->natts, false, NULL); + /* Initialize the visimap */ + uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); + GetAppendOnlyEntryAuxOids(relation->rd_id, + snapshot, + NULL, NULL, NULL, + &visimaprelid, &visimapidxid); + AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, + visimaprelid, + visimapidxid, + AccessShareLock, + snapshot); state->uniqueCheckDesc = uniqueCheckDesc; MemoryContextSwitchTo(oldcxt); } @@ -854,6 +876,12 @@ aoco_index_fetch_tuple(struct IndexFetchTableData *scan, * * There is no need to fetch the tuple (we actually can't reliably do so as * we might encounter a placeholder row in the block directory) + * + * If no visible block directory entry exists, we are done. If it does, we need + * to further check the visibility of the tuple itself by consulting the visimap. + * Now, the visimap check can be skipped if the tuple was found to have been + * inserted by a concurrent in-progress transaction, in which case we return + * true and have the xwait machinery kick in. */ static bool aoco_index_fetch_tuple_exists(Relation rel, @@ -862,8 +890,8 @@ aoco_index_fetch_tuple_exists(Relation rel, bool *all_dead) { AOCSUniqueCheckDesc uniqueCheckDesc; - AppendOnlyBlockDirectory *blockDirectory; AOTupleId *aoTupleId = (AOTupleId *) tid; + bool visible; #ifdef USE_ASSERT_CHECKING int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); @@ -903,8 +931,41 @@ aoco_index_fetch_tuple_exists(Relation rel, return true; uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); - blockDirectory = uniqueCheckDesc->blockDirectory; - return AppendOnlyBlockDirectory_CoversTuple(blockDirectory, aoTupleId); + + /* + * Check to see if there is a block directory entry for the tuple. If no + * such entry exists, the tuple doesn't exist physically in the segfile. + */ + if (!AppendOnlyBlockDirectory_CoversTuple(uniqueCheckDesc->blockDirectory, + aoTupleId)) + return false; + + /* + * If the xmin or xmax are set for the dirty snapshot, after the block + * directory is scanned with the snapshot, it means that there is a + * concurrent in-progress transaction inserting the tuple. So, return true + * and have the xwait machinery kick in. + */ + if (TransactionIdIsValid(snapshot->xmin) || TransactionIdIsValid(snapshot->xmax)) + return true; + + /* + * Consult the visimap to check if the tuple was deleted by a *committed* + * transaction. + */ + visible = AppendOnlyVisimap_IsVisible(uniqueCheckDesc->visimap, aoTupleId); + /* + * Since we disallow deletes and updates running in parallel with inserts, + * there is no way that the dirty snapshot has it's xmin and xmax populated + * after the visimap has been scanned with it. + * + * Note: we disallow it by grabbing an ExclusiveLock on the QD (See + * CdbTryOpenTable()). So if we are running in utility mode, there is no + * such restriction. + */ + AssertImply(Gp_role != GP_ROLE_UTILITY, + (!TransactionIdIsValid(snapshot->xmin) && !TransactionIdIsValid(snapshot->xmax))); + return visible; } static void diff --git a/src/backend/access/appendonly/README.md b/src/backend/access/appendonly/README.md index 798b7b0b23e..056b4c0e163 100644 --- a/src/backend/access/appendonly/README.md +++ b/src/backend/access/appendonly/README.md @@ -199,7 +199,20 @@ To answer unique index lookups, we don't have to physically fetch the tuple from the table. This is key to answering unique index lookups against placeholder rows which predate their corresponding data rows. We simply perform a sysscan of the block directory, and if we have a visible entry that encompasses the rowNum -being looked up, we report success. +being looked up, we go on to the next check. Otherwise, we have no conflict and +return. The next check that we need to perform is against the visimap, to see if +the tuple is visible. If yes, then we have a conflict. Since the snapshot used +to perform uniqueness checks for AO/CO is SNAPSHOT_DIRTY (we currently don't +support SNAPSHOT_SELF used for CREATE UNIQUE INDEX CONCURRENTLY), it is possible +to detect if the block directory tuple (and by extension the data tuple) was +inserted by a concurrent in-progress transaction. In this case, we simply avoid +the visimap check and return true. The benefit of performing the sysscan on the +block directory is that HeapTupleSatisfiesDirty() is called, and in the process, +the snapshot's xmin and/or xmax fields are updated (see SNAPSHOT_DIRTY for +details on its special contract). Returning true in this situation will lead to +the unique index code's xwait mechanism to kick in (see _bt_check_unique()) and +the current transaction will wait for the one that inserted the tuple to commit +or abort. Tableam changes: Since there is a lot of overhead (leads to ~20x performance degradation in the worst case) in setting up and tearing down scan descriptors @@ -214,3 +227,15 @@ directory struct. It will be modified later on to hold a visimap reference to help implement DELETEs/UPDATEs. Furthermore, we initialize this struct on the first unique index check performed, akin to how we initialize descriptors for insert and delete. + +AO lazy VACUUM is different from heap vacuum in the sense that ctids of data +tuples change (and the index tuples need to be updated as a consequence). It +leverages the scan and insert code to scan live tuples from each segfile and to +move (insert) them in a target segfile. While moving tuples, we need to avoid +performing uniqueness checks from the insert machinery. This is to ensure that +we avoid spurious conflicts between the moved tuple and the original tuple. We +don't need to insert a placeholder row for the backend running vacuum as the old +index entries will still point to the segment being compacted. This will be the +case up until the index entries are bulk deleted, but by then the new index +entries along with new block directory rows would already have been written and +would be able to answer uniqueness checks. diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index c11ac90a300..62b075e9dba 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -273,9 +273,16 @@ appendonly_dml_finish(Relation relation, CmdType operation) if (state->uniqueCheckDesc) { + /* clean up the block directory */ AppendOnlyBlockDirectory_End_forSearch(state->uniqueCheckDesc->blockDirectory); pfree(state->uniqueCheckDesc->blockDirectory); state->uniqueCheckDesc->blockDirectory = NULL; + + /* clean up the visimap */ + AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); + pfree(state->uniqueCheckDesc->visimap); + state->uniqueCheckDesc->visimap = NULL; + pfree(state->uniqueCheckDesc); state->uniqueCheckDesc = NULL; } @@ -417,13 +424,28 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) { MemoryContext oldcxt; AppendOnlyUniqueCheckDesc uniqueCheckDesc; + Oid visimaprelid; + Oid visimapidxid; oldcxt = MemoryContextSwitchTo(appendOnlyLocal.stateCxt); uniqueCheckDesc = palloc0(sizeof(AppendOnlyUniqueCheckDescData)); + + /* Initialize the block directory */ uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, snapshot, NULL, -1, relation, 1, false, NULL); + /* Initialize the visimap */ + uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); + GetAppendOnlyEntryAuxOids(relation->rd_id, + snapshot, + NULL, NULL, NULL, + &visimaprelid, &visimapidxid); + AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, + visimaprelid, + visimapidxid, + AccessShareLock, + snapshot); state->uniqueCheckDesc = uniqueCheckDesc; MemoryContextSwitchTo(oldcxt); } @@ -631,6 +653,12 @@ appendonly_index_fetch_tuple(struct IndexFetchTableData *scan, * * There is no need to fetch the tuple (we actually can't reliably do so as * we might encounter a placeholder row in the block directory) + * + * If no visible block directory entry exists, we are done. If it does, we need + * to further check the visibility of the tuple itself by consulting the visimap. + * Now, the visimap check can be skipped if the tuple was found to have been + * inserted by a concurrent in-progress transaction, in which case we return + * true and have the xwait machinery kick in. */ static bool appendonly_index_fetch_tuple_exists(Relation rel, @@ -639,8 +667,8 @@ appendonly_index_fetch_tuple_exists(Relation rel, bool *all_dead) { AppendOnlyUniqueCheckDesc uniqueCheckDesc; - AppendOnlyBlockDirectory *blockDirectory; AOTupleId *aoTupleId = (AOTupleId *) tid; + bool visible; #ifdef USE_ASSERT_CHECKING int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); @@ -680,8 +708,40 @@ appendonly_index_fetch_tuple_exists(Relation rel, return true; uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); - blockDirectory = uniqueCheckDesc->blockDirectory; - return AppendOnlyBlockDirectory_CoversTuple(blockDirectory, aoTupleId); + /* + * Check to see if there is a block directory entry for the tuple. If no + * such entry exists, the tuple doesn't exist physically in the segfile. + */ + if (!AppendOnlyBlockDirectory_CoversTuple(uniqueCheckDesc->blockDirectory, + aoTupleId)) + return false; + + /* + * If the xmin or xmax are set for the dirty snapshot, after the block + * directory is scanned with the snapshot, it means that there is a + * concurrent in-progress transaction inserting the tuple. So, return true + * and have the xwait machinery kick in. + */ + if (TransactionIdIsValid(snapshot->xmin) || TransactionIdIsValid(snapshot->xmax)) + return true; + + /* + * Consult the visimap to check if the tuple was deleted by a *committed* + * transaction. + */ + visible = AppendOnlyVisimap_IsVisible(uniqueCheckDesc->visimap, aoTupleId); + /* + * Since we disallow deletes and updates running in parallel with inserts, + * there is no way that the dirty snapshot has it's xmin and xmax populated + * after the visimap has been scanned with it. + * + * Note: we disallow it by grabbing an ExclusiveLock on the QD (See + * CdbTryOpenTable()). So if we are running in utility mode, there is no + * such restriction. + */ + AssertImply(Gp_role != GP_ROLE_UTILITY, + (!TransactionIdIsValid(snapshot->xmin) && !TransactionIdIsValid(snapshot->xmax))); + return visible; } diff --git a/src/include/cdb/cdbaocsam.h b/src/include/cdb/cdbaocsam.h index a1c55bcba00..cf3bf0d8c65 100644 --- a/src/include/cdb/cdbaocsam.h +++ b/src/include/cdb/cdbaocsam.h @@ -264,6 +264,7 @@ typedef struct AOCSDeleteDescData *AOCSDeleteDesc; typedef struct AOCSUniqueCheckDescData { AppendOnlyBlockDirectory *blockDirectory; + AppendOnlyVisimap *visimap; } AOCSUniqueCheckDescData; typedef struct AOCSUniqueCheckDescData *AOCSUniqueCheckDesc; diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index 42a680b24e7..3ae23cd1688 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -373,6 +373,7 @@ typedef struct AppendOnlyDeleteDescData *AppendOnlyDeleteDesc; typedef struct AppendOnlyUniqueCheckDescData { AppendOnlyBlockDirectory *blockDirectory; + AppendOnlyVisimap *visimap; } AppendOnlyUniqueCheckDescData; typedef struct AppendOnlyUniqueCheckDescData *AppendOnlyUniqueCheckDesc; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index e66d9266f17..3f747fccd60 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -232,7 +232,7 @@ test: uao_ddl/alter_drop_allcol_row uao_ddl/alter_drop_allcol_column uao_ddl/alt # These tests use gp_select_invisible and VACUUM, and will get confused if there are # concurrent transactions holding back the global xmin. -test: uao_dml/uao_dml_cursor_row uao_dml/uao_dml_select_row uao_dml/uao_dml_cursor_column uao_dml/uao_dml_select_column +test: uao_dml/uao_dml_cursor_row uao_dml/uao_dml_select_row uao_dml/uao_dml_cursor_column uao_dml/uao_dml_select_column uao_dml/uao_dml_unique_index_delete_row uao_dml/uao_dml_unique_index_delete_column # disable autovacuum for the test diff --git a/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source b/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source new file mode 100644 index 00000000000..ab4c2c9501e --- /dev/null +++ b/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source @@ -0,0 +1,73 @@ +create schema uao_dml_unique_index_@amname@; +set search_path=uao_dml_unique_index_@amname@; +set default_table_access_method=@amname@; + +SET gp_appendonly_enable_unique_index TO ON; + +-- Case 1: Inserting tx inserting a deleted key--------------------------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- Should not raise a conflict as the key has been deleted. +INSERT INTO uao_unique_index_delete VALUES (1); +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; + +-- Case 2: Inserting tx inserting a key whose delete was aborted---------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +DELETE FROM uao_unique_index_delete WHERE a = 1; +ABORT; +-- Should raise a conflict as the delete of the key was aborted. +INSERT INTO uao_unique_index_delete VALUES (1); +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; + +-- Case 3: Inserting tx inserting a key deleted in the same tx------------------ +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should succeed as the INSERT should see that the conflicting key was deleted. +INSERT INTO uao_unique_index_delete VALUES (1); +COMMIT; +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; + +-- Case 4: Deleting tx deletes a key already deleted in the same tx------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should be a no-op as it should see the prior DELETE. +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; + +-- Case 5: Deleting tx deletes a key inserted in the same tx-------------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +BEGIN; +INSERT INTO uao_unique_index_delete VALUES (1); +-- should be able to see and delete the inserted key. +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; + +-- Case 6: Deleting tx deletes a key absent from the table---------------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +-- should be a no-op. +DELETE FROM uao_unique_index_delete WHERE a = 1; +INSERT INTO uao_unique_index_delete VALUES (1); +-- should be a no-op. +DELETE FROM uao_unique_index_delete WHERE a = 2; +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; diff --git a/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source b/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source new file mode 100644 index 00000000000..2e8cc8af1ec --- /dev/null +++ b/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source @@ -0,0 +1,90 @@ +create schema uao_dml_unique_index_@amname@; +set search_path=uao_dml_unique_index_@amname@; +set default_table_access_method=@amname@; +SET gp_appendonly_enable_unique_index TO ON; +-- Case 1: Inserting tx inserting a deleted key--------------------------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- Should not raise a conflict as the key has been deleted. +INSERT INTO uao_unique_index_delete VALUES (1); +SELECT * FROM uao_unique_index_delete; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_delete; +-- Case 2: Inserting tx inserting a key whose delete was aborted---------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +DELETE FROM uao_unique_index_delete WHERE a = 1; +ABORT; +-- Should raise a conflict as the delete of the key was aborted. +INSERT INTO uao_unique_index_delete VALUES (1); +ERROR: duplicate key value violates unique constraint "uao_unique_index_delete_a_key" (seg1 192.168.0.148:7003 pid=1637278) +DETAIL: Key (a)=(1) already exists. +SELECT * FROM uao_unique_index_delete; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_delete; +-- Case 3: Inserting tx inserting a key deleted in the same tx------------------ +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should succeed as the INSERT should see that the conflicting key was deleted. +INSERT INTO uao_unique_index_delete VALUES (1); +COMMIT; +SELECT * FROM uao_unique_index_delete; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_delete; +-- Case 4: Deleting tx deletes a key already deleted in the same tx------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should be a no-op as it should see the prior DELETE. +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + a +--- +(0 rows) + +DROP TABLE uao_unique_index_delete; +-- Case 5: Deleting tx deletes a key inserted in the same tx-------------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +BEGIN; +INSERT INTO uao_unique_index_delete VALUES (1); +-- should be able to see and delete the inserted key. +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + a +--- +(0 rows) + +DROP TABLE uao_unique_index_delete; +-- Case 6: Deleting tx deletes a key absent from the table---------------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +-- should be a no-op. +DELETE FROM uao_unique_index_delete WHERE a = 1; +INSERT INTO uao_unique_index_delete VALUES (1); +-- should be a no-op. +DELETE FROM uao_unique_index_delete WHERE a = 2; +SELECT * FROM uao_unique_index_delete; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_delete; From d6ac0edb0ab3aea2935ad8286cf1a25b8af91488 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Fri, 4 Nov 2022 15:19:24 -0700 Subject: [PATCH 06/19] UPDATE on AO/CO tables with unique indexes This commit formally adds support for UPDATEs on AO/CO tables with unique indexes. Technically, UPDATEs were supported by the prior commits for DELETE and INSERT. This commit includes tests to formally support UPDATE and addresses the unsupported edge case: UPDATE foo SET i = 1 WHERE i = 1; Here, within the UPDATE command itself, there is a need to see what tuples are deleted in the delete half of the command, in uniqueness checks done by the insert half of the command. This is crucial to avoid spurious conflicts in similar situations as above, where the source and target keys are equal in the UPDATE invocation. To that end, we reuse the visimap used in the delete half of the command in the uniqueness check performed during the insert half. --- src/backend/access/aocs/aocsam.c | 113 ------------ src/backend/access/aocs/aocsam_handler.c | 51 ++++-- src/backend/access/appendonly/appendonlyam.c | 48 ------ .../access/appendonly/appendonlyam_handler.c | 52 ++++-- src/include/cdb/cdbaocsam.h | 25 ++- src/include/cdb/cdbappendonlyam.h | 30 ++++ src/test/regress/greenplum_schedule | 2 +- .../uao_dml_unique_index_update.source | 118 +++++++++++++ .../uao_dml_unique_index_update.source | 161 ++++++++++++++++++ 9 files changed, 409 insertions(+), 191 deletions(-) create mode 100644 src/test/regress/input/uao_dml/uao_dml_unique_index_update.source create mode 100644 src/test/regress/output/uao_dml/uao_dml_unique_index_update.source diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index 72838a1df1c..d6cd970abff 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -1859,119 +1859,6 @@ aocs_fetch_finish(AOCSFetchDesc aocsFetchDesc) AppendOnlyVisimap_Finish(&aocsFetchDesc->visibilityMap, AccessShareLock); } -typedef struct AOCSUpdateDescData -{ - AOCSInsertDesc insertDesc; - - /* - * visibility map - */ - AppendOnlyVisimap visibilityMap; - - /* - * Visimap delete support structure. Used to handle out-of-order deletes - */ - AppendOnlyVisimapDelete visiMapDelete; - -} AOCSUpdateDescData; - -AOCSUpdateDesc -aocs_update_init(Relation rel, int segno) -{ - Oid visimaprelid; - Oid visimapidxid; - AOCSUpdateDesc desc = (AOCSUpdateDesc) palloc0(sizeof(AOCSUpdateDescData)); - - desc->insertDesc = aocs_insert_init(rel, segno); - - GetAppendOnlyEntryAuxOids(rel->rd_id, - desc->insertDesc->appendOnlyMetaDataSnapshot, - NULL, NULL, NULL, - &visimaprelid, &visimapidxid); - AppendOnlyVisimap_Init(&desc->visibilityMap, - visimaprelid, - visimapidxid, - RowExclusiveLock, - desc->insertDesc->appendOnlyMetaDataSnapshot); - - AppendOnlyVisimapDelete_Init(&desc->visiMapDelete, - &desc->visibilityMap); - - return desc; -} - -void -aocs_update_finish(AOCSUpdateDesc desc) -{ - Assert(desc); - - AppendOnlyVisimapDelete_Finish(&desc->visiMapDelete); - - aocs_insert_finish(desc->insertDesc, NULL); - desc->insertDesc = NULL; - - /* Keep lock until the end of transaction */ - AppendOnlyVisimap_Finish(&desc->visibilityMap, NoLock); - - pfree(desc); -} - -TM_Result -aocs_update(AOCSUpdateDesc desc, TupleTableSlot *slot, - AOTupleId *oldTupleId, AOTupleId *newTupleId) -{ - TM_Result result; - - Assert(desc); - Assert(oldTupleId); - Assert(newTupleId); - -#ifdef FAULT_INJECTOR - FaultInjector_InjectFaultIfSet( - "appendonly_update", - DDLNotSpecified, - "", //databaseName - RelationGetRelationName(desc->insertDesc->aoi_rel)); - /* tableName */ -#endif - - result = AppendOnlyVisimapDelete_Hide(&desc->visiMapDelete, oldTupleId); - if (result != TM_Ok) - return result; - - slot_getallattrs(slot); - aocs_insert_values(desc->insertDesc, - slot->tts_values, slot->tts_isnull, - newTupleId); - - return result; -} - - -/* - * AOCSDeleteDescData is used for delete data from AOCS relations. - * It serves an equivalent purpose as AppendOnlyScanDescData - * (relscan.h) only that the later is used for scanning append-only - * relations. - */ -typedef struct AOCSDeleteDescData -{ - /* - * Relation to delete from - */ - Relation aod_rel; - - /* - * visibility map - */ - AppendOnlyVisimap visibilityMap; - - /* - * Visimap delete support structure. Used to handle out-of-order deletes - */ - AppendOnlyVisimapDelete visiMapDelete; - -} AOCSDeleteDescData; /* diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 4a27fc4fb53..0022ac98fcd 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -273,6 +273,7 @@ void aoco_dml_finish(Relation relation, CmdType operation) { AOCODMLState *state; + bool had_delete_desc = false; state = remove_dml_state(RelationGetRelid(relation)); @@ -291,6 +292,8 @@ aoco_dml_finish(Relation relation, CmdType operation) */ if (!state->insertDesc) AORelIncrementModCount(relation); + + had_delete_desc = true; } if (state->insertDesc) @@ -307,9 +310,16 @@ aoco_dml_finish(Relation relation, CmdType operation) pfree(state->uniqueCheckDesc->blockDirectory); state->uniqueCheckDesc->blockDirectory = NULL; - /* clean up the visimap */ - AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); - pfree(state->uniqueCheckDesc->visimap); + /* + * If this fetch is a part of an update, then we have been reusing the + * visimap used by the delete half of the update, which would have + * already been cleaned up above. Clean up otherwise. + */ + if (!had_delete_desc) + { + AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); + pfree(state->uniqueCheckDesc->visimap); + } state->uniqueCheckDesc->visimap = NULL; pfree(state->uniqueCheckDesc); @@ -472,17 +482,30 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, snapshot, NULL, -1, relation, relation->rd_att->natts, false, NULL); - /* Initialize the visimap */ - uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); - GetAppendOnlyEntryAuxOids(relation->rd_id, - snapshot, - NULL, NULL, NULL, - &visimaprelid, &visimapidxid); - AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, - visimaprelid, - visimapidxid, - AccessShareLock, - snapshot); + /* + * If this is part of an update, we need to reuse the visimap used by + * the delete half of the update. This is to avoid spurious conflicts + * when the key's previous and new value are identical. Using the + * visimap from the delete half ensures that the visimap can recognize + * any tuples deleted by us prior to this insert, within this command. + */ + if (state->deleteDesc) + uniqueCheckDesc->visimap = &state->deleteDesc->visibilityMap; + else + { + /* Initialize the visimap */ + uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); + GetAppendOnlyEntryAuxOids(relation->rd_id, + snapshot, + NULL, NULL, NULL, + &visimaprelid, &visimapidxid); + AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, + visimaprelid, + visimapidxid, + AccessShareLock, + snapshot); + } + state->uniqueCheckDesc = uniqueCheckDesc; MemoryContextSwitchTo(oldcxt); } diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 8775cf639c9..d92affc9226 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -68,54 +68,6 @@ #include "utils/memutils.h" #include "utils/snapmgr.h" -/* - * AppendOnlyDeleteDescData is used for delete data from append-only - * relations. It serves an equivalent purpose as AppendOnlyScanDescData - * (cdbappendonlyam.h) only that the later is used for scanning append-only - * relations. - */ -typedef struct AppendOnlyDeleteDescData -{ - /* - * Relation to delete from - */ - Relation aod_rel; - - /* - * Snapshot to use for meta data operations - */ - Snapshot appendOnlyMetaDataSnapshot; - - /* - * visibility map - */ - AppendOnlyVisimap visibilityMap; - - /* - * Visimap delete support structure. Used to handle out-of-order deletes - */ - AppendOnlyVisimapDelete visiMapDelete; - -} AppendOnlyDeleteDescData; - -/* - * AppendOnlyUpdateDescData is used to update data from append-only - * relations. It serves an equivalent purpose as AppendOnlyScanDescData - * (cdbappendonlyam.h) only that the later is used for scanning append-only - * relations. - */ -typedef struct AppendOnlyUpdateDescData -{ - AppendOnlyInsertDesc aoInsertDesc; - - AppendOnlyVisimap visibilityMap; - - /* - * Visimap delete support structure. Used to handle out-of-order deletes - */ - AppendOnlyVisimapDelete visiMapDelete; - -} AppendOnlyUpdateDescData; typedef enum AoExecutorBlockKind { diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index 62b075e9dba..b7fb9af4408 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -244,6 +244,7 @@ void appendonly_dml_finish(Relation relation, CmdType operation) { AppendOnlyDMLState *state; + bool had_delete_desc = false; state = remove_dml_state(RelationGetRelid(relation)); @@ -262,6 +263,8 @@ appendonly_dml_finish(Relation relation, CmdType operation) */ if (!state->insertDesc) AORelIncrementModCount(relation); + + had_delete_desc = true; } if (state->insertDesc) @@ -278,9 +281,16 @@ appendonly_dml_finish(Relation relation, CmdType operation) pfree(state->uniqueCheckDesc->blockDirectory); state->uniqueCheckDesc->blockDirectory = NULL; - /* clean up the visimap */ - AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); - pfree(state->uniqueCheckDesc->visimap); + /* + * If this fetch is a part of an update, then we have been reusing the + * visimap used by the delete half of the update, which would have + * already been cleaned up above. Clean up otherwise. + */ + if (!had_delete_desc) + { + AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); + pfree(state->uniqueCheckDesc->visimap); + } state->uniqueCheckDesc->visimap = NULL; pfree(state->uniqueCheckDesc); @@ -435,17 +445,31 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, snapshot, NULL, -1, relation, 1, false, NULL); - /* Initialize the visimap */ - uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); - GetAppendOnlyEntryAuxOids(relation->rd_id, - snapshot, - NULL, NULL, NULL, - &visimaprelid, &visimapidxid); - AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, - visimaprelid, - visimapidxid, - AccessShareLock, - snapshot); + + /* + * If this is part of an update, we need to reuse the visimap used by + * the delete half of the update. This is to avoid spurious conflicts + * when the key's previous and new value are identical. Using the + * visimap from the delete half ensures that the visimap can recognize + * any tuples deleted by us prior to this insert, within this command. + */ + if (state->deleteDesc) + uniqueCheckDesc->visimap = &state->deleteDesc->visibilityMap; + else + { + /* Initialize the visimap */ + uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); + GetAppendOnlyEntryAuxOids(relation->rd_id, + snapshot, + NULL, NULL, NULL, + &visimaprelid, &visimapidxid); + AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, + visimaprelid, + visimapidxid, + AccessShareLock, + snapshot); + } + state->uniqueCheckDesc = uniqueCheckDesc; MemoryContextSwitchTo(oldcxt); } diff --git a/src/include/cdb/cdbaocsam.h b/src/include/cdb/cdbaocsam.h index cf3bf0d8c65..48c5ae2cbf5 100644 --- a/src/include/cdb/cdbaocsam.h +++ b/src/include/cdb/cdbaocsam.h @@ -258,7 +258,30 @@ typedef struct AOCSFetchDescData typedef AOCSFetchDescData *AOCSFetchDesc; -typedef struct AOCSUpdateDescData *AOCSUpdateDesc; +/* + * AOCSDeleteDescData is used for delete data from AOCS relations. + * It serves an equivalent purpose as AppendOnlyScanDescData + * (relscan.h) only that the later is used for scanning append-only + * relations. + */ +typedef struct AOCSDeleteDescData +{ + /* + * Relation to delete from + */ + Relation aod_rel; + + /* + * visibility map + */ + AppendOnlyVisimap visibilityMap; + + /* + * Visimap delete support structure. Used to handle out-of-order deletes + */ + AppendOnlyVisimapDelete visiMapDelete; + +} AOCSDeleteDescData; typedef struct AOCSDeleteDescData *AOCSDeleteDesc; typedef struct AOCSUniqueCheckDescData diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index 3ae23cd1688..83939c21481 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -368,6 +368,36 @@ typedef struct AppendOnlyFetchDescData typedef AppendOnlyFetchDescData *AppendOnlyFetchDesc; +/* + * AppendOnlyDeleteDescData is used for delete data from append-only + * relations. It serves an equivalent purpose as AppendOnlyScanDescData + * (relscan.h) only that the later is used for scanning append-only + * relations. + */ +typedef struct AppendOnlyDeleteDescData +{ + /* + * Relation to delete from + */ + Relation aod_rel; + + /* + * Snapshot to use for meta data operations + */ + Snapshot appendOnlyMetaDataSnapshot; + + /* + * visibility map + */ + AppendOnlyVisimap visibilityMap; + + /* + * Visimap delete support structure. Used to handle out-of-order deletes + */ + AppendOnlyVisimapDelete visiMapDelete; + +} AppendOnlyDeleteDescData; + typedef struct AppendOnlyDeleteDescData *AppendOnlyDeleteDesc; typedef struct AppendOnlyUniqueCheckDescData diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 3f747fccd60..c5bd9fd5b54 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -232,7 +232,7 @@ test: uao_ddl/alter_drop_allcol_row uao_ddl/alter_drop_allcol_column uao_ddl/alt # These tests use gp_select_invisible and VACUUM, and will get confused if there are # concurrent transactions holding back the global xmin. -test: uao_dml/uao_dml_cursor_row uao_dml/uao_dml_select_row uao_dml/uao_dml_cursor_column uao_dml/uao_dml_select_column uao_dml/uao_dml_unique_index_delete_row uao_dml/uao_dml_unique_index_delete_column +test: uao_dml/uao_dml_cursor_row uao_dml/uao_dml_select_row uao_dml/uao_dml_cursor_column uao_dml/uao_dml_select_column uao_dml/uao_dml_unique_index_delete_row uao_dml/uao_dml_unique_index_delete_column uao_dml/uao_dml_unique_index_update_row uao_dml/uao_dml_unique_index_update_column # disable autovacuum for the test diff --git a/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source b/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source new file mode 100644 index 00000000000..c1e4eb90cc9 --- /dev/null +++ b/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source @@ -0,0 +1,118 @@ +create schema uao_dml_unique_index_update_@amname@; +set search_path=uao_dml_unique_index_update_@amname@; +set default_table_access_method=@amname@; + +SET gp_appendonly_enable_unique_index TO ON; + +-- Case 1: Inserting tx inserting a key affected by an update-------------------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- Should not raise a conflict as the key has been deleted the update. +INSERT INTO uao_unique_index_update VALUES (1); +-- Should raise a conflict as the key has been inserted by the update +INSERT INTO uao_unique_index_update VALUES (2); +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 2: Inserting tx inserting a key whose update was aborted---------------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +ABORT; +-- Should raise a conflict as the update of the key was aborted. +INSERT INTO uao_unique_index_update VALUES (1); +-- Should not raise a conflict as the update of the key was aborted. +INSERT INTO uao_unique_index_update VALUES (2); +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 3: Inserting tx inserting a key updated in the same tx------------------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- should succeed as the conflicting key was deleted by the update. +INSERT INTO uao_unique_index_update VALUES (1); +-- should raise a conflict as the key was inserted by the update. +INSERT INTO uao_unique_index_update VALUES (2); +COMMIT; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 4: Updating tx deletes a key already updated in the same tx------------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- should be a no-op as the key has already been deleted by the update +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- should succeed as the key has been inserted by the 1st update +UPDATE uao_unique_index_update SET a = 3 WHERE a = 2; +COMMIT; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 5: Updating tx updates a key inserted in the same tx-------------------- +CREATE TABLE uao_unique_index_update (a INT unique); +BEGIN; +INSERT INTO uao_unique_index_update VALUES (1); +-- should be able to see and update the inserted key. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_update; + +-- Case 6: Updating tx updates a key to a key inserted in the same tx----------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +INSERT INTO uao_unique_index_update VALUES (2); +-- should raise a conflict with the key inserted inside the same tx. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +END; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 7: Updating tx tries to update a key absent from the table-------------- +CREATE TABLE uao_unique_index_update (a INT unique); +-- should be a no-op. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +INSERT INTO uao_unique_index_update VALUES (1); +-- should be a no-op. +UPDATE uao_unique_index_update SET a = 3 WHERE a = 2; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 8: Update where pre-update key = post-update key------------------------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +-- should succeed even though the pre-update and post-update values are equal. +UPDATE uao_unique_index_update SET a = 1 WHERE a = 1; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 9: Updating tx inserts a key that already exists------------------------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1), (2); +-- should raise a conflict as the target value already exists. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 10: Updating command tries to insert the same key more than once-------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update SELECT generate_series(1,5); +-- should raise a conflict as the second update will try to insert 6, which was +-- already inserted by the first update. +UPDATE uao_unique_index_update SET a=6 WHERE a>2; + +DROP TABLE uao_unique_index_update; diff --git a/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source b/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source new file mode 100644 index 00000000000..872e6b79d8f --- /dev/null +++ b/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source @@ -0,0 +1,161 @@ +create schema uao_dml_unique_index_update_@amname@; +set search_path=uao_dml_unique_index_update_@amname@; +set default_table_access_method=@amname@; +SET gp_appendonly_enable_unique_index TO ON; +-- Case 1: Inserting tx inserting a key affected by an update-------------------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- Should not raise a conflict as the key has been deleted the update. +INSERT INTO uao_unique_index_update VALUES (1); +-- Should raise a conflict as the key has been inserted by the update +INSERT INTO uao_unique_index_update VALUES (2); +ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg0 192.168.0.148:7002 pid=1927811) +DETAIL: Key (a)=(2) already exists. +SELECT * FROM uao_unique_index_update; + a +--- + 2 + 1 +(2 rows) + +DROP TABLE uao_unique_index_update; +-- Case 2: Inserting tx inserting a key whose update was aborted---------------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +ABORT; +-- Should raise a conflict as the update of the key was aborted. +INSERT INTO uao_unique_index_update VALUES (1); +ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg1 192.168.0.148:7003 pid=1927812) +DETAIL: Key (a)=(1) already exists. +-- Should not raise a conflict as the update of the key was aborted. +INSERT INTO uao_unique_index_update VALUES (2); +SELECT * FROM uao_unique_index_update; + a +--- + 2 + 1 +(2 rows) + +DROP TABLE uao_unique_index_update; +-- Case 3: Inserting tx inserting a key updated in the same tx------------------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- should succeed as the conflicting key was deleted by the update. +INSERT INTO uao_unique_index_update VALUES (1); +-- should raise a conflict as the key was inserted by the update. +INSERT INTO uao_unique_index_update VALUES (2); +ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg0 192.168.0.148:7002 pid=1927811) +DETAIL: Key (a)=(2) already exists. +COMMIT; +SELECT * FROM uao_unique_index_update; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_update; +-- Case 4: Updating tx deletes a key already updated in the same tx------------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- should be a no-op as the key has already been deleted by the update +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +-- should succeed as the key has been inserted by the 1st update +UPDATE uao_unique_index_update SET a = 3 WHERE a = 2; +COMMIT; +SELECT * FROM uao_unique_index_update; + a +--- + 3 +(1 row) + +DROP TABLE uao_unique_index_update; +-- Case 5: Updating tx updates a key inserted in the same tx-------------------- +CREATE TABLE uao_unique_index_update (a INT unique); +BEGIN; +INSERT INTO uao_unique_index_update VALUES (1); +-- should be able to see and update the inserted key. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_update; + a +--- + 2 +(1 row) + +-- Case 6: Updating tx updates a key to a key inserted in the same tx----------- +CREATE TABLE uao_unique_index_update (a INT unique); +ERROR: relation "uao_unique_index_update" already exists +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +INSERT INTO uao_unique_index_update VALUES (2); +ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg0 192.168.0.148:7002 pid=1927811) +DETAIL: Key (a)=(2) already exists. +-- should raise a conflict with the key inserted inside the same tx. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +ERROR: current transaction is aborted, commands ignored until end of transaction block +END; +SELECT * FROM uao_unique_index_update; + a +--- + 2 + 1 +(2 rows) + +DROP TABLE uao_unique_index_update; +-- Case 7: Updating tx tries to update a key absent from the table-------------- +CREATE TABLE uao_unique_index_update (a INT unique); +-- should be a no-op. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +INSERT INTO uao_unique_index_update VALUES (1); +-- should be a no-op. +UPDATE uao_unique_index_update SET a = 3 WHERE a = 2; +SELECT * FROM uao_unique_index_update; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_update; +-- Case 8: Update where pre-update key = post-update key------------------------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +-- should succeed even though the pre-update and post-update values are equal. +UPDATE uao_unique_index_update SET a = 1 WHERE a = 1; +SELECT * FROM uao_unique_index_update; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_update; +-- Case 9: Updating tx inserts a key that already exists------------------------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1), (2); +-- should raise a conflict as the target value already exists. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg0 192.168.0.148:7002 pid=1929685) +DETAIL: Key (a)=(2) already exists. +SELECT * FROM uao_unique_index_update; + a +--- + 2 + 1 +(2 rows) + +DROP TABLE uao_unique_index_update; +-- Case 10: Updating command tries to insert the same key more than once-------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update SELECT generate_series(1,5); +-- should raise a conflict as the second update will try to insert 6, which was +-- already inserted by the first update. +UPDATE uao_unique_index_update SET a=6 WHERE a>2; +ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg2 192.168.0.148:7004 pid=1669359) +DETAIL: Key (a)=(6) already exists. +DROP TABLE uao_unique_index_update; From 381c1ab8140ea8f136ff84a54c848515ea6c1263 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Sun, 6 Nov 2022 19:38:52 -0800 Subject: [PATCH 07/19] ao/co: Fix snapshot setup for unique index checks (#14414) Unique index checks use SNAPSHOT_DIRTY, which is stack allocated and a new snapshot object is passed always to table_index_fetch_tuple_check(). While this is fine for heap, for AO/CO tables, we persist this stack allocated object in the unique check descriptor's block directory and visimap data structures, which are palloced. This leads to nasty memory trespassing, resulting in segfaults. Remedy this by deferring the metadata snapshot assignment to index_fetch_tuple_exists(), just before the block directory and visimap are scanned. --- src/backend/access/aocs/aocsam_handler.c | 56 +++++++++++++++--- .../access/appendonly/appendonlyam_handler.c | 57 ++++++++++++++++--- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 0022ac98fcd..5b4c8409014 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -463,7 +463,7 @@ get_delete_descriptor(const Relation relation, bool forUpdate) } static AOCSUniqueCheckDesc -get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) +get_or_create_unique_check_desc(Relation relation) { AOCODMLState *state = find_dml_state(RelationGetRelid(relation)); @@ -479,8 +479,16 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) /* Initialize the block directory */ uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); + /* + * Note: we defer setting up the appendOnlyMetaDataSnapshot for the + * block directory to aoco_index_fetch_tuple_exists(). This is because + * snapshots used for unique index lookups may be stack-allocated and a + * new snapshot object may be passed to every unique index check. (for + * SNAPSHOT_DIRTY) + */ AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, - snapshot, NULL, -1, relation, + InvalidSnapshot, /* appendOnlyMetaDataSnapshot */ + NULL, -1, relation, relation->rd_att->natts, false, NULL); /* * If this is part of an update, we need to reuse the visimap used by @@ -496,14 +504,21 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) /* Initialize the visimap */ uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); GetAppendOnlyEntryAuxOids(relation->rd_id, - snapshot, + InvalidSnapshot, /* catalog snap is fine for this */ NULL, NULL, NULL, &visimaprelid, &visimapidxid); + /* + * Note: we don't set up the appendOnlyMetadataSnapshot for the + * visimap here. It is deferred to aoco_index_fetch_tuple_exists(). + * This is because snapshots used for unique index lookups may be + * stack-allocated and a new snapshot object may be used for every + * unique index check. (for SNAPSHOT_DIRTY) + */ AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, visimaprelid, visimapidxid, AccessShareLock, - snapshot); + InvalidSnapshot /* appendOnlyMetaDataSnapshot */); } state->uniqueCheckDesc = uniqueCheckDesc; @@ -914,7 +929,11 @@ aoco_index_fetch_tuple_exists(Relation rel, { AOCSUniqueCheckDesc uniqueCheckDesc; AOTupleId *aoTupleId = (AOTupleId *) tid; + AppendOnlyBlockDirectory *blockDirectory; + AppendOnlyVisimap *visimap; + bool blkdir_covers; bool visible; + Snapshot save_snapshot; #ifdef USE_ASSERT_CHECKING int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); @@ -953,14 +972,23 @@ aoco_index_fetch_tuple_exists(Relation rel, if (snapshot->snapshot_type == SNAPSHOT_SELF) return true; - uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); + uniqueCheckDesc = get_or_create_unique_check_desc(rel); /* * Check to see if there is a block directory entry for the tuple. If no * such entry exists, the tuple doesn't exist physically in the segfile. + * + * Note: We need to use the passed in snapshot to perform the block + * directory lookup. See get_or_create_unique_check_desc() for why we don't + * set the snapshot up prior. */ - if (!AppendOnlyBlockDirectory_CoversTuple(uniqueCheckDesc->blockDirectory, - aoTupleId)) + blockDirectory = uniqueCheckDesc->blockDirectory; + Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot); + blockDirectory->appendOnlyMetaDataSnapshot = snapshot; + blkdir_covers = AppendOnlyBlockDirectory_CoversTuple(blockDirectory, + aoTupleId); + blockDirectory->appendOnlyMetaDataSnapshot = InvalidSnapshot; + if (!blkdir_covers) return false; /* @@ -975,8 +1003,19 @@ aoco_index_fetch_tuple_exists(Relation rel, /* * Consult the visimap to check if the tuple was deleted by a *committed* * transaction. + * + * Note: we need to use the passed in snapshot to perform the visimap lookup. + * See get_or_create_unique_check_desc() for why we don't set the snapshot + * up prior there. + * If this is part of an update, we are reusing the visimap from the delete + * half of the update, so better restore its snapshot once we are done. */ - visible = AppendOnlyVisimap_IsVisible(uniqueCheckDesc->visimap, aoTupleId); + visimap = uniqueCheckDesc->visimap; + save_snapshot = visimap->visimapStore.snapshot; + visimap->visimapStore.snapshot = snapshot; + visible = AppendOnlyVisimap_IsVisible(visimap, aoTupleId); + visimap->visimapStore.snapshot = save_snapshot; + /* * Since we disallow deletes and updates running in parallel with inserts, * there is no way that the dirty snapshot has it's xmin and xmax populated @@ -988,6 +1027,7 @@ aoco_index_fetch_tuple_exists(Relation rel, */ AssertImply(Gp_role != GP_ROLE_UTILITY, (!TransactionIdIsValid(snapshot->xmin) && !TransactionIdIsValid(snapshot->xmax))); + return visible; } diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index b7fb9af4408..9146511ac02 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -426,7 +426,7 @@ get_delete_descriptor(const Relation relation, bool forUpdate) } static AppendOnlyUniqueCheckDesc -get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) +get_or_create_unique_check_desc(Relation relation) { AppendOnlyDMLState *state = find_dml_state(RelationGetRelid(relation)); @@ -442,8 +442,16 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) /* Initialize the block directory */ uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); + /* + * Note: we defer setting up the appendOnlyMetaDataSnapshot for the + * block directory to appendonly_index_fetch_tuple_exists(). This is + * because snapshots used for unique index lookups may be stack-allocated + * and a new snapshot object may be passed to every unique index check. + * (for SNAPSHOT_DIRTY) + */ AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, - snapshot, NULL, -1, relation, + InvalidSnapshot, /* appendOnlyMetaDataSnapshot */ + NULL, -1, relation, 1, false, NULL); /* @@ -460,14 +468,21 @@ get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) /* Initialize the visimap */ uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); GetAppendOnlyEntryAuxOids(relation->rd_id, - snapshot, + InvalidSnapshot, /* catalog snap is fine for this */ NULL, NULL, NULL, &visimaprelid, &visimapidxid); + /* + * Note: we don't set up the appendOnlyMetadataSnapshot for the + * visimap here. It is deferred to appendonly_index_fetch_tuple_exists(). + * This is because snapshots used for unique index lookups may be + * stack-allocated and a new snapshot object may be used for every + * unique index check. (for SNAPSHOT_DIRTY) + */ AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, visimaprelid, visimapidxid, AccessShareLock, - snapshot); + InvalidSnapshot /* appendOnlyMetaDataSnapshot */); } state->uniqueCheckDesc = uniqueCheckDesc; @@ -692,7 +707,11 @@ appendonly_index_fetch_tuple_exists(Relation rel, { AppendOnlyUniqueCheckDesc uniqueCheckDesc; AOTupleId *aoTupleId = (AOTupleId *) tid; + AppendOnlyBlockDirectory *blockDirectory; + AppendOnlyVisimap *visimap; + bool blkdir_covers; bool visible; + Snapshot save_snapshot; #ifdef USE_ASSERT_CHECKING int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); @@ -731,13 +750,23 @@ appendonly_index_fetch_tuple_exists(Relation rel, if (snapshot->snapshot_type == SNAPSHOT_SELF) return true; - uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); + uniqueCheckDesc = get_or_create_unique_check_desc(rel); + /* * Check to see if there is a block directory entry for the tuple. If no * such entry exists, the tuple doesn't exist physically in the segfile. + * + * Note: We need to use the passed in snapshot to perform the block + * directory lookup. See get_or_create_unique_check_desc() for why we don't + * set the snapshot up prior. */ - if (!AppendOnlyBlockDirectory_CoversTuple(uniqueCheckDesc->blockDirectory, - aoTupleId)) + blockDirectory = uniqueCheckDesc->blockDirectory; + Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot); + blockDirectory->appendOnlyMetaDataSnapshot = snapshot; + blkdir_covers = AppendOnlyBlockDirectory_CoversTuple(blockDirectory, + aoTupleId); + blockDirectory->appendOnlyMetaDataSnapshot = InvalidSnapshot; + if (!blkdir_covers) return false; /* @@ -752,8 +781,19 @@ appendonly_index_fetch_tuple_exists(Relation rel, /* * Consult the visimap to check if the tuple was deleted by a *committed* * transaction. + * + * Note: we need to use the passed in snapshot to perform the visimap lookup. + * See get_or_create_unique_check_desc() for why we don't set the snapshot + * up prior there. + * If this is part of an update, we are reusing the visimap from the delete + * half of the update, so better restore its snapshot once we are done. */ - visible = AppendOnlyVisimap_IsVisible(uniqueCheckDesc->visimap, aoTupleId); + visimap = uniqueCheckDesc->visimap; + save_snapshot = visimap->visimapStore.snapshot; + visimap->visimapStore.snapshot = snapshot; + visible = AppendOnlyVisimap_IsVisible(visimap, aoTupleId); + visimap->visimapStore.snapshot = save_snapshot; + /* * Since we disallow deletes and updates running in parallel with inserts, * there is no way that the dirty snapshot has it's xmin and xmax populated @@ -765,6 +805,7 @@ appendonly_index_fetch_tuple_exists(Relation rel, */ AssertImply(Gp_role != GP_ROLE_UTILITY, (!TransactionIdIsValid(snapshot->xmin) && !TransactionIdIsValid(snapshot->xmax))); + return visible; } From f85e3e3a58c6a86d1a7a59556ae34318d9f7fd46 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 14 Nov 2022 10:07:42 -0800 Subject: [PATCH 08/19] ao/co: Refactor blkdir/visimap access for unique checks This commit refactors block directory and visimap initialization, lookup and teardown code to make the special non-MVCC snapshot manipulation more apparent. Assumptions are also asserted copiously. --- src/backend/access/aocs/aocsam_handler.c | 87 +++++-------------- .../access/appendonly/appendonly_visimap.c | 55 ++++++++++++ .../access/appendonly/appendonlyam_handler.c | 87 +++++-------------- .../appendonly/appendonlyblockdirectory.c | 73 ++++++++++++++++ src/include/access/appendonly_visimap.h | 39 +++++++++ src/include/cdb/cdbappendonlyblockdirectory.h | 44 ++++++++++ 6 files changed, 251 insertions(+), 134 deletions(-) diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 5b4c8409014..3f31542e257 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -306,7 +306,7 @@ aoco_dml_finish(Relation relation, CmdType operation) if (state->uniqueCheckDesc) { /* clean up the block directory */ - AppendOnlyBlockDirectory_End_forSearch(state->uniqueCheckDesc->blockDirectory); + AppendOnlyBlockDirectory_End_forUniqueChecks(state->uniqueCheckDesc->blockDirectory); pfree(state->uniqueCheckDesc->blockDirectory); state->uniqueCheckDesc->blockDirectory = NULL; @@ -317,7 +317,7 @@ aoco_dml_finish(Relation relation, CmdType operation) */ if (!had_delete_desc) { - AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); + AppendOnlyVisimap_Finish_forUniquenessChecks(state->uniqueCheckDesc->visimap); pfree(state->uniqueCheckDesc->visimap); } state->uniqueCheckDesc->visimap = NULL; @@ -463,7 +463,7 @@ get_delete_descriptor(const Relation relation, bool forUpdate) } static AOCSUniqueCheckDesc -get_or_create_unique_check_desc(Relation relation) +get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) { AOCODMLState *state = find_dml_state(RelationGetRelid(relation)); @@ -471,25 +471,16 @@ get_or_create_unique_check_desc(Relation relation) { MemoryContext oldcxt; AOCSUniqueCheckDesc uniqueCheckDesc; - Oid visimaprelid; - Oid visimapidxid; oldcxt = MemoryContextSwitchTo(aocoLocal.stateCxt); uniqueCheckDesc = palloc0(sizeof(AOCSUniqueCheckDescData)); /* Initialize the block directory */ uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); - /* - * Note: we defer setting up the appendOnlyMetaDataSnapshot for the - * block directory to aoco_index_fetch_tuple_exists(). This is because - * snapshots used for unique index lookups may be stack-allocated and a - * new snapshot object may be passed to every unique index check. (for - * SNAPSHOT_DIRTY) - */ - AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, - InvalidSnapshot, /* appendOnlyMetaDataSnapshot */ - NULL, -1, relation, - relation->rd_att->natts, false, NULL); + AppendOnlyBlockDirectory_Init_forUniqueChecks(uniqueCheckDesc->blockDirectory, + relation, + relation->rd_att->natts, /* numColGroups */ + snapshot); /* * If this is part of an update, we need to reuse the visimap used by * the delete half of the update. This is to avoid spurious conflicts @@ -503,22 +494,9 @@ get_or_create_unique_check_desc(Relation relation) { /* Initialize the visimap */ uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); - GetAppendOnlyEntryAuxOids(relation->rd_id, - InvalidSnapshot, /* catalog snap is fine for this */ - NULL, NULL, NULL, - &visimaprelid, &visimapidxid); - /* - * Note: we don't set up the appendOnlyMetadataSnapshot for the - * visimap here. It is deferred to aoco_index_fetch_tuple_exists(). - * This is because snapshots used for unique index lookups may be - * stack-allocated and a new snapshot object may be used for every - * unique index check. (for SNAPSHOT_DIRTY) - */ - AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, - visimaprelid, - visimapidxid, - AccessShareLock, - InvalidSnapshot /* appendOnlyMetaDataSnapshot */); + AppendOnlyVisimap_Init_forUniqueCheck(uniqueCheckDesc->visimap, + relation, + snapshot); } state->uniqueCheckDesc = uniqueCheckDesc; @@ -929,11 +907,7 @@ aoco_index_fetch_tuple_exists(Relation rel, { AOCSUniqueCheckDesc uniqueCheckDesc; AOTupleId *aoTupleId = (AOTupleId *) tid; - AppendOnlyBlockDirectory *blockDirectory; - AppendOnlyVisimap *visimap; - bool blkdir_covers; bool visible; - Snapshot save_snapshot; #ifdef USE_ASSERT_CHECKING int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); @@ -972,23 +946,12 @@ aoco_index_fetch_tuple_exists(Relation rel, if (snapshot->snapshot_type == SNAPSHOT_SELF) return true; - uniqueCheckDesc = get_or_create_unique_check_desc(rel); + uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); - /* - * Check to see if there is a block directory entry for the tuple. If no - * such entry exists, the tuple doesn't exist physically in the segfile. - * - * Note: We need to use the passed in snapshot to perform the block - * directory lookup. See get_or_create_unique_check_desc() for why we don't - * set the snapshot up prior. - */ - blockDirectory = uniqueCheckDesc->blockDirectory; - Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot); - blockDirectory->appendOnlyMetaDataSnapshot = snapshot; - blkdir_covers = AppendOnlyBlockDirectory_CoversTuple(blockDirectory, - aoTupleId); - blockDirectory->appendOnlyMetaDataSnapshot = InvalidSnapshot; - if (!blkdir_covers) + /* First, scan the block directory */ + if (!AppendOnlyBlockDirectory_UniqueCheck(uniqueCheckDesc->blockDirectory, + aoTupleId, + snapshot)) return false; /* @@ -997,24 +960,14 @@ aoco_index_fetch_tuple_exists(Relation rel, * concurrent in-progress transaction inserting the tuple. So, return true * and have the xwait machinery kick in. */ + Assert(snapshot->snapshot_type == SNAPSHOT_DIRTY); if (TransactionIdIsValid(snapshot->xmin) || TransactionIdIsValid(snapshot->xmax)) return true; - /* - * Consult the visimap to check if the tuple was deleted by a *committed* - * transaction. - * - * Note: we need to use the passed in snapshot to perform the visimap lookup. - * See get_or_create_unique_check_desc() for why we don't set the snapshot - * up prior there. - * If this is part of an update, we are reusing the visimap from the delete - * half of the update, so better restore its snapshot once we are done. - */ - visimap = uniqueCheckDesc->visimap; - save_snapshot = visimap->visimapStore.snapshot; - visimap->visimapStore.snapshot = snapshot; - visible = AppendOnlyVisimap_IsVisible(visimap, aoTupleId); - visimap->visimapStore.snapshot = save_snapshot; + /* Now, consult the visimap */ + visible = AppendOnlyVisimap_UniqueCheck(uniqueCheckDesc->visimap, + aoTupleId, + snapshot); /* * Since we disallow deletes and updates running in parallel with inserts, diff --git a/src/backend/access/appendonly/appendonly_visimap.c b/src/backend/access/appendonly/appendonly_visimap.c index ea9a587f2bd..8a52756512d 100644 --- a/src/backend/access/appendonly/appendonly_visimap.c +++ b/src/backend/access/appendonly/appendonly_visimap.c @@ -864,3 +864,58 @@ AppendOnlyVisimapDelete_Finish( hash_destroy(visiMapDelete->dirtyEntryCache); BufFileClose(visiMapDelete->workfile); } + +/* + * AppendOnlyVisimap_Init_forUniqueCheck + * + * Initializes the visimap to determine if tuples were deleted as a part of + * uniqueness checks. + * + * Note: we defer setting up the appendOnlyMetaDataSnapshot for the visibility + * map to the index_fetch_tuple_exists() table AM call. This is because + * snapshots used for unique index lookups are special and don't follow the + * usual allocation or registration mechanism. They may be stack-allocated and a + * new snapshot object may be passed to every unique index check (this happens + * when SNAPSHOT_DIRTY is passed). While technically, we could set up the + * metadata snapshot in advance for SNAPSHOT_SELF, the alternative is fine. + */ +void AppendOnlyVisimap_Init_forUniqueCheck( + AppendOnlyVisimap *visiMap, + Relation aoRel, + Snapshot snapshot) +{ + Oid visimaprelid; + Oid visimapidxid; + + Assert(snapshot->snapshot_type == SNAPSHOT_DIRTY || + snapshot->snapshot_type == SNAPSHOT_SELF); + + GetAppendOnlyEntryAuxOids(aoRel->rd_id, + InvalidSnapshot, /* catalog snapshot is enough */ + NULL, NULL, NULL, &visimaprelid, &visimapidxid); + if (!OidIsValid(visimaprelid) || !OidIsValid(visimapidxid)) + elog(ERROR, "Could not find block directory for relation: %u", aoRel->rd_id); + + AppendOnlyVisimap_Init(visiMap, + visimaprelid, + visimapidxid, + AccessShareLock, + InvalidSnapshot /* appendOnlyMetaDataSnapshot */); +} + +void +AppendOnlyVisimap_Finish_forUniquenessChecks( + AppendOnlyVisimap *visiMap) +{ + /* + * The snapshot was either reset to NULL in between calls or already cleaned + * up (if this was part of an update command) + */ + Assert(visiMap->visimapStore.snapshot == InvalidSnapshot); + + AppendOnlyVisimapStore_Finish(&visiMap->visimapStore, AccessShareLock); + AppendOnlyVisimapEntry_Finish(&visiMap->visimapEntry); + + MemoryContextDelete(visiMap->memoryContext); + visiMap->memoryContext = NULL; +} diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index 9146511ac02..a46e716f477 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -277,7 +277,7 @@ appendonly_dml_finish(Relation relation, CmdType operation) if (state->uniqueCheckDesc) { /* clean up the block directory */ - AppendOnlyBlockDirectory_End_forSearch(state->uniqueCheckDesc->blockDirectory); + AppendOnlyBlockDirectory_End_forUniqueChecks(state->uniqueCheckDesc->blockDirectory); pfree(state->uniqueCheckDesc->blockDirectory); state->uniqueCheckDesc->blockDirectory = NULL; @@ -288,7 +288,7 @@ appendonly_dml_finish(Relation relation, CmdType operation) */ if (!had_delete_desc) { - AppendOnlyVisimap_Finish(state->uniqueCheckDesc->visimap, AccessShareLock); + AppendOnlyVisimap_Finish_forUniquenessChecks(state->uniqueCheckDesc->visimap); pfree(state->uniqueCheckDesc->visimap); } state->uniqueCheckDesc->visimap = NULL; @@ -426,7 +426,7 @@ get_delete_descriptor(const Relation relation, bool forUpdate) } static AppendOnlyUniqueCheckDesc -get_or_create_unique_check_desc(Relation relation) +get_or_create_unique_check_desc(Relation relation, Snapshot snapshot) { AppendOnlyDMLState *state = find_dml_state(RelationGetRelid(relation)); @@ -434,25 +434,16 @@ get_or_create_unique_check_desc(Relation relation) { MemoryContext oldcxt; AppendOnlyUniqueCheckDesc uniqueCheckDesc; - Oid visimaprelid; - Oid visimapidxid; oldcxt = MemoryContextSwitchTo(appendOnlyLocal.stateCxt); uniqueCheckDesc = palloc0(sizeof(AppendOnlyUniqueCheckDescData)); /* Initialize the block directory */ uniqueCheckDesc->blockDirectory = palloc0(sizeof(AppendOnlyBlockDirectory)); - /* - * Note: we defer setting up the appendOnlyMetaDataSnapshot for the - * block directory to appendonly_index_fetch_tuple_exists(). This is - * because snapshots used for unique index lookups may be stack-allocated - * and a new snapshot object may be passed to every unique index check. - * (for SNAPSHOT_DIRTY) - */ - AppendOnlyBlockDirectory_Init_forSearch(uniqueCheckDesc->blockDirectory, - InvalidSnapshot, /* appendOnlyMetaDataSnapshot */ - NULL, -1, relation, - 1, false, NULL); + AppendOnlyBlockDirectory_Init_forUniqueChecks(uniqueCheckDesc->blockDirectory, + relation, + 1, /* numColGroups */ + snapshot); /* * If this is part of an update, we need to reuse the visimap used by @@ -467,22 +458,9 @@ get_or_create_unique_check_desc(Relation relation) { /* Initialize the visimap */ uniqueCheckDesc->visimap = palloc0(sizeof(AppendOnlyVisimap)); - GetAppendOnlyEntryAuxOids(relation->rd_id, - InvalidSnapshot, /* catalog snap is fine for this */ - NULL, NULL, NULL, - &visimaprelid, &visimapidxid); - /* - * Note: we don't set up the appendOnlyMetadataSnapshot for the - * visimap here. It is deferred to appendonly_index_fetch_tuple_exists(). - * This is because snapshots used for unique index lookups may be - * stack-allocated and a new snapshot object may be used for every - * unique index check. (for SNAPSHOT_DIRTY) - */ - AppendOnlyVisimap_Init(uniqueCheckDesc->visimap, - visimaprelid, - visimapidxid, - AccessShareLock, - InvalidSnapshot /* appendOnlyMetaDataSnapshot */); + AppendOnlyVisimap_Init_forUniqueCheck(uniqueCheckDesc->visimap, + relation, + snapshot); } state->uniqueCheckDesc = uniqueCheckDesc; @@ -707,11 +685,7 @@ appendonly_index_fetch_tuple_exists(Relation rel, { AppendOnlyUniqueCheckDesc uniqueCheckDesc; AOTupleId *aoTupleId = (AOTupleId *) tid; - AppendOnlyBlockDirectory *blockDirectory; - AppendOnlyVisimap *visimap; - bool blkdir_covers; bool visible; - Snapshot save_snapshot; #ifdef USE_ASSERT_CHECKING int segmentFileNum = AOTupleIdGet_segmentFileNum(aoTupleId); @@ -750,23 +724,12 @@ appendonly_index_fetch_tuple_exists(Relation rel, if (snapshot->snapshot_type == SNAPSHOT_SELF) return true; - uniqueCheckDesc = get_or_create_unique_check_desc(rel); + uniqueCheckDesc = get_or_create_unique_check_desc(rel, snapshot); - /* - * Check to see if there is a block directory entry for the tuple. If no - * such entry exists, the tuple doesn't exist physically in the segfile. - * - * Note: We need to use the passed in snapshot to perform the block - * directory lookup. See get_or_create_unique_check_desc() for why we don't - * set the snapshot up prior. - */ - blockDirectory = uniqueCheckDesc->blockDirectory; - Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot); - blockDirectory->appendOnlyMetaDataSnapshot = snapshot; - blkdir_covers = AppendOnlyBlockDirectory_CoversTuple(blockDirectory, - aoTupleId); - blockDirectory->appendOnlyMetaDataSnapshot = InvalidSnapshot; - if (!blkdir_covers) + /* First, scan the block directory */ + if (!AppendOnlyBlockDirectory_UniqueCheck(uniqueCheckDesc->blockDirectory, + aoTupleId, + snapshot)) return false; /* @@ -775,24 +738,14 @@ appendonly_index_fetch_tuple_exists(Relation rel, * concurrent in-progress transaction inserting the tuple. So, return true * and have the xwait machinery kick in. */ + Assert(snapshot->snapshot_type == SNAPSHOT_DIRTY); if (TransactionIdIsValid(snapshot->xmin) || TransactionIdIsValid(snapshot->xmax)) return true; - /* - * Consult the visimap to check if the tuple was deleted by a *committed* - * transaction. - * - * Note: we need to use the passed in snapshot to perform the visimap lookup. - * See get_or_create_unique_check_desc() for why we don't set the snapshot - * up prior there. - * If this is part of an update, we are reusing the visimap from the delete - * half of the update, so better restore its snapshot once we are done. - */ - visimap = uniqueCheckDesc->visimap; - save_snapshot = visimap->visimapStore.snapshot; - visimap->visimapStore.snapshot = snapshot; - visible = AppendOnlyVisimap_IsVisible(visimap, aoTupleId); - visimap->visimapStore.snapshot = save_snapshot; + /* Now, consult the visimap */ + visible = AppendOnlyVisimap_UniqueCheck(uniqueCheckDesc->visimap, + aoTupleId, + snapshot); /* * Since we disallow deletes and updates running in parallel with inserts, diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index b465a47a940..409d1d08fad 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -220,6 +220,64 @@ AppendOnlyBlockDirectory_Init_forSearch( init_internal(blockDirectory); } +/* + * AppendOnlyBlockDirectory_Init_forUniqueChecks + * + * Initializes the block directory to handle lookups for uniqueness checks. + * + * Note: These lookups will be purely restricted to the block directory relation + * itself and will not involve the physical AO relation. + * + * Note: we defer setting up the appendOnlyMetaDataSnapshot for the block + * directory to the index_fetch_tuple_exists() table AM call. This is because + * snapshots used for unique index lookups are special and don't follow the + * usual allocation or registration mechanism. They may be stack-allocated and a + * new snapshot object may be passed to every unique index check (this happens + * when SNAPSHOT_DIRTY is passed). While technically, we could set up the + * metadata snapshot in advance for SNAPSHOT_SELF, the alternative is fine. + */ +void +AppendOnlyBlockDirectory_Init_forUniqueChecks( + AppendOnlyBlockDirectory *blockDirectory, + Relation aoRel, + int numColumnGroups, + Snapshot snapshot) +{ + Oid blkdirrelid; + Oid blkdiridxid; + + Assert(RelationIsValid(aoRel)); + + Assert(snapshot->snapshot_type == SNAPSHOT_DIRTY || + snapshot->snapshot_type == SNAPSHOT_SELF); + + GetAppendOnlyEntryAuxOids(aoRel->rd_id, + InvalidSnapshot, /* catalog snapshot is enough */ + NULL, &blkdirrelid, &blkdiridxid, NULL, NULL); + + if (!OidIsValid(blkdirrelid) || !OidIsValid(blkdiridxid)) + elog(ERROR, "Could not find block directory for relation: %u", aoRel->rd_id); + + blockDirectory->aoRel = aoRel; + blockDirectory->isAOCol = RelationIsAoCols(aoRel); + + /* Segfile setup is not necessary as physical AO tuples will not be accessed */ + blockDirectory->segmentFileInfo = NULL; + blockDirectory->totalSegfiles = -1; + blockDirectory->currentSegmentFileNum = -1; + + /* Metadata snapshot assignment is deferred to lookup-time */ + blockDirectory->appendOnlyMetaDataSnapshot = InvalidSnapshot; + + blockDirectory->numColumnGroups = numColumnGroups; + blockDirectory->proj = NULL; + + blockDirectory->blkdirRel = heap_open(blkdirrelid, AccessShareLock); + blockDirectory->blkdirIdx = index_open(blkdiridxid, AccessShareLock); + + init_internal(blockDirectory); +} + /* * AppendOnlyBlockDirectory_Init_forInsert * @@ -1539,3 +1597,18 @@ AppendOnlyBlockDirectory_End_addCol( MemoryContextDelete(blockDirectory->memoryContext); } + +void +AppendOnlyBlockDirectory_End_forUniqueChecks(AppendOnlyBlockDirectory *blockDirectory) +{ + Assert(RelationIsValid(blockDirectory->blkdirRel)); + + /* This must have been reset after each uniqueness check */ + Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot); + + if (blockDirectory->blkdirIdx) + index_close(blockDirectory->blkdirIdx, AccessShareLock); + heap_close(blockDirectory->blkdirRel, AccessShareLock); + + MemoryContextDelete(blockDirectory->memoryContext); +} diff --git a/src/include/access/appendonly_visimap.h b/src/include/access/appendonly_visimap.h index ce7257cd467..ababd194296 100644 --- a/src/include/access/appendonly_visimap.h +++ b/src/include/access/appendonly_visimap.h @@ -132,6 +132,14 @@ void AppendOnlyVisimapScan_Init( LOCKMODE lockmode, Snapshot appendonlyMetadataSnapshot); +extern void AppendOnlyVisimap_Init_forUniqueCheck( + AppendOnlyVisimap *visiMap, + Relation aoRel, + Snapshot snapshot); + +extern void AppendOnlyVisimap_Finish_forUniquenessChecks( + AppendOnlyVisimap *visiMap); + bool AppendOnlyVisimapScan_GetNextInvisible( AppendOnlyVisimapScan *visiMapScan, AOTupleId *tupleId); @@ -149,4 +157,35 @@ TM_Result AppendOnlyVisimapDelete_Hide( void AppendOnlyVisimapDelete_Finish( AppendOnlyVisimapDelete *visiMapDelete); + +/* + * AppendOnlyVisimap_UniqueCheck + * + * During a uniqueness check, look up the visimap to see if a tuple was deleted + * by a *committed* transaction. + * + * Note: We need to use the passed in per-tuple snapshot to perform the block + * directory lookup. See AppendOnlyVisimap_Init_forUniqueCheck() for details on + * why we can't set up the metadata snapshot at init time. + * If this is part of an update, we are reusing the visimap from the delete half + * of the update, so better restore its snapshot once we are done. + */ +static inline bool AppendOnlyVisimap_UniqueCheck( + AppendOnlyVisimap *visiMap, + AOTupleId *aoTupleId, + Snapshot appendOnlyMetaDataSnapshot) +{ + Snapshot save_snapshot; + bool visible; + + Assert(appendOnlyMetaDataSnapshot->snapshot_type == SNAPSHOT_DIRTY || + appendOnlyMetaDataSnapshot->snapshot_type == SNAPSHOT_SELF); + + save_snapshot = visiMap->visimapStore.snapshot; + visiMap->visimapStore.snapshot = appendOnlyMetaDataSnapshot; + visible = AppendOnlyVisimap_IsVisible(visiMap, aoTupleId); + visiMap->visimapStore.snapshot = save_snapshot; + return visible; +} + #endif diff --git a/src/include/cdb/cdbappendonlyblockdirectory.h b/src/include/cdb/cdbappendonlyblockdirectory.h index d3d97b50065..9e4f92a1453 100644 --- a/src/include/cdb/cdbappendonlyblockdirectory.h +++ b/src/include/cdb/cdbappendonlyblockdirectory.h @@ -214,6 +214,10 @@ extern void AppendOnlyBlockDirectory_Init_forSearch( int numColumnGroups, bool isAOCol, bool *proj); +extern void AppendOnlyBlockDirectory_Init_forUniqueChecks(AppendOnlyBlockDirectory *blockDirectory, + Relation aoRel, + int numColumnGroups, + Snapshot snapshot); extern void AppendOnlyBlockDirectory_Init_addCol( AppendOnlyBlockDirectory *blockDirectory, Snapshot appendOnlyMetaDataSnapshot, @@ -253,12 +257,52 @@ extern void AppendOnlyBlockDirectory_DeleteSegmentFile( Snapshot snapshot, int segno, int columnGroupNo); +extern void AppendOnlyBlockDirectory_End_forUniqueChecks( + AppendOnlyBlockDirectory *blockDirectory); extern void AppendOnlyBlockDirectory_InsertPlaceholder(AppendOnlyBlockDirectory *blockDirectory, int64 firstRowNum, int64 fileOffset, int columnGroupNo); +/* + * AppendOnlyBlockDirectory_UniqueCheck + * + * Check to see if there is a block directory entry for the tuple. If no such + * entry exists, the tuple doesn't exist physically in the segfile. + * + * Note: We need to use the passed in per-tuple snapshot to perform the block + * directory lookup. See AppendOnlyBlockDirectory_Init_forUniqueCheck() for + * details on why we can't set up the metadata snapshot at init time. + */ +static inline bool AppendOnlyBlockDirectory_UniqueCheck( + AppendOnlyBlockDirectory *blockDirectory, + AOTupleId *aoTupleId, + Snapshot appendOnlyMetaDataSnapshot +) +{ + bool covers; + + Assert(appendOnlyMetaDataSnapshot->snapshot_type == SNAPSHOT_DIRTY || + appendOnlyMetaDataSnapshot->snapshot_type == SNAPSHOT_SELF); + + Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot); + + /* Set up the snapshot to use for the block directory scan */ + blockDirectory->appendOnlyMetaDataSnapshot = appendOnlyMetaDataSnapshot; + + covers = AppendOnlyBlockDirectory_CoversTuple(blockDirectory, + aoTupleId); + + /* + * Reset the metadata snapshot to avoid leaking a stack reference. We have + * to do this since SNAPSHOT_DIRTY is stack-allocated. + */ + blockDirectory->appendOnlyMetaDataSnapshot = InvalidSnapshot; + + return covers; +} + static inline uint32 minipage_size(uint32 nEntry) { From 86420fd8691b3bba16a5d8548d2f2c3c7ab0cdf3 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 21 Nov 2022 09:22:45 -0800 Subject: [PATCH 09/19] Unconditionally close aoblkdiridx during finish By the time we reach AppendOnlyBlockDirectory_End_forUniqueChecks(), there is a guarantee that AppendOnlyBlockDirectory->blkdirIdx is open. Thus there is no need to conditionally close the index. --- src/backend/access/appendonly/appendonlyblockdirectory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index 409d1d08fad..9bed6bbce0b 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -1606,8 +1606,10 @@ AppendOnlyBlockDirectory_End_forUniqueChecks(AppendOnlyBlockDirectory *blockDire /* This must have been reset after each uniqueness check */ Assert(blockDirectory->appendOnlyMetaDataSnapshot == InvalidSnapshot); - if (blockDirectory->blkdirIdx) - index_close(blockDirectory->blkdirIdx, AccessShareLock); + Assert(RelationIsValid(blockDirectory->blkdirIdx)); + Assert(RelationIsValid(blockDirectory->blkdirRel)); + + index_close(blockDirectory->blkdirIdx, AccessShareLock); heap_close(blockDirectory->blkdirRel, AccessShareLock); MemoryContextDelete(blockDirectory->memoryContext); From 181df1b5f527257ca142bf343bfc97754d978c58 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 21 Nov 2022 09:24:00 -0800 Subject: [PATCH 10/19] ao/co: Add missing traces to unique check init/end --- src/backend/access/appendonly/appendonly_visimap.c | 14 +++++++++++++- .../access/appendonly/appendonlyblockdirectory.c | 12 ++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/backend/access/appendonly/appendonly_visimap.c b/src/backend/access/appendonly/appendonly_visimap.c index 8a52756512d..939c15fb825 100644 --- a/src/backend/access/appendonly/appendonly_visimap.c +++ b/src/backend/access/appendonly/appendonly_visimap.c @@ -896,6 +896,11 @@ void AppendOnlyVisimap_Init_forUniqueCheck( if (!OidIsValid(visimaprelid) || !OidIsValid(visimapidxid)) elog(ERROR, "Could not find block directory for relation: %u", aoRel->rd_id); + ereportif(Debug_appendonly_print_visimap, LOG, + (errmsg("Append-only visimap init for unique checks"), + errdetail("(aoRel = %u, visimaprel = %u, visimapidxrel = %u)", + aoRel->rd_id, visimaprelid, visimapidxid))); + AppendOnlyVisimap_Init(visiMap, visimaprelid, visimapidxid, @@ -907,11 +912,18 @@ void AppendOnlyVisimap_Finish_forUniquenessChecks( AppendOnlyVisimap *visiMap) { + AppendOnlyVisimapStore *visimapStore = &visiMap->visimapStore; /* * The snapshot was either reset to NULL in between calls or already cleaned * up (if this was part of an update command) */ - Assert(visiMap->visimapStore.snapshot == InvalidSnapshot); + Assert(visimapStore->snapshot == InvalidSnapshot); + + ereportif(Debug_appendonly_print_visimap, LOG, + (errmsg("Append-only visimap finish for unique checks"), + errdetail("(visimaprel = %u, visimapidxrel = %u)", + visimapStore->visimapRelation->rd_id, + visimapStore->visimapRelation->rd_id))); AppendOnlyVisimapStore_Finish(&visiMap->visimapStore, AccessShareLock); AppendOnlyVisimapEntry_Finish(&visiMap->visimapEntry); diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index 9bed6bbce0b..41fce17972f 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -258,6 +258,11 @@ AppendOnlyBlockDirectory_Init_forUniqueChecks( if (!OidIsValid(blkdirrelid) || !OidIsValid(blkdiridxid)) elog(ERROR, "Could not find block directory for relation: %u", aoRel->rd_id); + ereportif(Debug_appendonly_print_blockdirectory, LOG, + (errmsg("Append-only block directory init for unique checks"), + errdetail("(aoRel = %u, blkdirrel = %u, blkdiridxrel = %u, numColumnGroups = %d)", + aoRel->rd_id, blkdirrelid, blkdiridxid, numColumnGroups))); + blockDirectory->aoRel = aoRel; blockDirectory->isAOCol = RelationIsAoCols(aoRel); @@ -1609,6 +1614,13 @@ AppendOnlyBlockDirectory_End_forUniqueChecks(AppendOnlyBlockDirectory *blockDire Assert(RelationIsValid(blockDirectory->blkdirIdx)); Assert(RelationIsValid(blockDirectory->blkdirRel)); + ereportif(Debug_appendonly_print_blockdirectory, LOG, + (errmsg("Append-only block directory end for unique checks"), + errdetail("(aoRel = %u, blkdirrel = %u, blkdiridxrel = %u)", + blockDirectory->aoRel->rd_id, + blockDirectory->blkdirRel->rd_id, + blockDirectory->blkdirIdx->rd_id))); + index_close(blockDirectory->blkdirIdx, AccessShareLock); heap_close(blockDirectory->blkdirRel, AccessShareLock); From c90505c6c8e60ec18cd775e4f6735e7096768f90 Mon Sep 17 00:00:00 2001 From: Haolin Wang Date: Tue, 7 Mar 2023 19:53:25 +0800 Subject: [PATCH 11/19] Rename index_fetch_tuple_exists to index_unique_check --- src/backend/access/aocs/aocsam_handler.c | 4 ++-- src/backend/access/appendonly/README.md | 2 +- src/backend/access/appendonly/appendonly_visimap.c | 2 +- src/backend/access/appendonly/appendonlyam_handler.c | 4 ++-- src/backend/access/appendonly/appendonlyblockdirectory.c | 2 +- src/backend/access/table/tableam.c | 4 ++-- src/include/access/tableam.h | 8 ++++---- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 3f31542e257..b2325b378a6 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -900,7 +900,7 @@ aoco_index_fetch_tuple(struct IndexFetchTableData *scan, * true and have the xwait machinery kick in. */ static bool -aoco_index_fetch_tuple_exists(Relation rel, +aoco_index_unique_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead) @@ -2197,7 +2197,7 @@ static TableAmRoutine ao_column_methods = { .index_fetch_reset = aoco_index_fetch_reset, .index_fetch_end = aoco_index_fetch_end, .index_fetch_tuple = aoco_index_fetch_tuple, - .index_fetch_tuple_exists = aoco_index_fetch_tuple_exists, + .index_unique_check = aoco_index_unique_check, .tuple_insert = aoco_tuple_insert, .tuple_insert_speculative = aoco_tuple_insert_speculative, diff --git a/src/backend/access/appendonly/README.md b/src/backend/access/appendonly/README.md index 056b4c0e163..70ac7d8b902 100644 --- a/src/backend/access/appendonly/README.md +++ b/src/backend/access/appendonly/README.md @@ -219,7 +219,7 @@ degradation in the worst case) in setting up and tearing down scan descriptors for AO/CO tables, we avoid the scanbegin..fetch..scanend construct in table_index_fetch_tuple_check(). -So, a new tableam API index_fetch_tuple_exists() is used, which is implemented +So, a new tableam API index_unique_check() is used, which is implemented only for AO/CO tables. Here, we fetch a UniqueCheckDesc, which stores all the in-memory state to help us perform a unique index check. This descriptor is attached to the DMLState structs. Currently, the descriptor holds only a block diff --git a/src/backend/access/appendonly/appendonly_visimap.c b/src/backend/access/appendonly/appendonly_visimap.c index 939c15fb825..d312c3a130d 100644 --- a/src/backend/access/appendonly/appendonly_visimap.c +++ b/src/backend/access/appendonly/appendonly_visimap.c @@ -872,7 +872,7 @@ AppendOnlyVisimapDelete_Finish( * uniqueness checks. * * Note: we defer setting up the appendOnlyMetaDataSnapshot for the visibility - * map to the index_fetch_tuple_exists() table AM call. This is because + * map to the index_unique_check() table AM call. This is because * snapshots used for unique index lookups are special and don't follow the * usual allocation or registration mechanism. They may be stack-allocated and a * new snapshot object may be passed to every unique index check (this happens diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index a46e716f477..4911a611ca1 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -678,7 +678,7 @@ appendonly_index_fetch_tuple(struct IndexFetchTableData *scan, * true and have the xwait machinery kick in. */ static bool -appendonly_index_fetch_tuple_exists(Relation rel, +appendonly_index_unique_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead) @@ -2322,7 +2322,7 @@ static const TableAmRoutine ao_row_methods = { .index_fetch_reset = appendonly_index_fetch_reset, .index_fetch_end = appendonly_index_fetch_end, .index_fetch_tuple = appendonly_index_fetch_tuple, - .index_fetch_tuple_exists = appendonly_index_fetch_tuple_exists, + .index_unique_check = appendonly_index_unique_check, .tuple_insert = appendonly_tuple_insert, .tuple_insert_speculative = appendonly_tuple_insert_speculative, diff --git a/src/backend/access/appendonly/appendonlyblockdirectory.c b/src/backend/access/appendonly/appendonlyblockdirectory.c index 41fce17972f..5b20a82e228 100644 --- a/src/backend/access/appendonly/appendonlyblockdirectory.c +++ b/src/backend/access/appendonly/appendonlyblockdirectory.c @@ -229,7 +229,7 @@ AppendOnlyBlockDirectory_Init_forSearch( * itself and will not involve the physical AO relation. * * Note: we defer setting up the appendOnlyMetaDataSnapshot for the block - * directory to the index_fetch_tuple_exists() table AM call. This is because + * directory to the index_unique_check() table AM call. This is because * snapshots used for unique index lookups are special and don't follow the * usual allocation or registration mechanism. They may be stack-allocated and a * new snapshot object may be passed to every unique index check (this happens diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 3f5fa7439f0..1eee611e022 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -286,10 +286,10 @@ table_index_fetch_tuple_check(Relation rel, /* * Optimized path for AO/CO relations as the aforementioned per-tuple * overhead is significant for AO/CO relations. For details, please refer to - * table_index_fetch_tuple_exists(). + * table_index_unique_check(). */ if (RelationIsAppendOptimized(rel)) - return table_index_fetch_tuple_exists(rel, tid, snapshot, all_dead); + return table_index_unique_check(rel, tid, snapshot, all_dead); slot = table_slot_create(rel, NULL); scan = table_index_fetch_begin(rel); diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index f5433ddaa2b..d9a3b37e9d3 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -458,8 +458,8 @@ typedef struct TableAmRoutine TupleTableSlot *slot, bool *call_again, bool *all_dead); - /* See table_index_fetch_tuple_exists() for details */ - bool (*index_fetch_tuple_exists) (Relation rel, + /* See table_index_unique_check() for details */ + bool (*index_unique_check) (Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead); @@ -1340,12 +1340,12 @@ extern bool table_index_fetch_tuple_check(Relation rel, * This has to have an identical signature to table_index_fetch_tuple_check(). */ static inline bool -table_index_fetch_tuple_exists(Relation rel, +table_index_unique_check(Relation rel, ItemPointer tid, Snapshot snapshot, bool *all_dead) { - return rel->rd_tableam->index_fetch_tuple_exists(rel, tid, snapshot, + return rel->rd_tableam->index_unique_check(rel, tid, snapshot, all_dead); } From 725192de109f15d7a8cdd916d111849220d7a2bb Mon Sep 17 00:00:00 2001 From: Haolin Wang Date: Sat, 25 Feb 2023 13:49:15 +0800 Subject: [PATCH 12/19] Introduce "version" attribute to pg_appendonly. This attribute is used to align with "formatversion" of pg_aoseg catalog for identifying the append-optimized relation version during planning stage. This will simplify validation for version dependent features such like index only scan, unique index creation and so on. --- src/backend/access/aocs/aocsam.c | 6 +- src/backend/access/aocs/aocssegfiles.c | 4 +- src/backend/access/appendonly/aosegfiles.c | 8 +- src/backend/access/appendonly/appendonlyam.c | 7 +- .../access/appendonly/appendonlywriter.c | 4 +- src/backend/catalog/heap.c | 3 +- src/backend/catalog/pg_appendonly.c | 4 +- src/backend/cdb/cdbappendonlystorageread.c | 2 +- src/backend/cdb/cdbappendonlystoragewrite.c | 2 +- src/backend/commands/indexcmds.c | 20 +- src/include/catalog/aoblkdir.h | 1 - src/include/catalog/pg_appendonly.h | 64 +++-- src/include/cdb/cdbappendonlyam.h | 2 +- src/include/cdb/cdbappendonlystoragewrite.h | 2 +- .../expected/uao_crash_compaction_row.out | 86 +++--- .../uao/test_pg_appendonly_version.source | 29 ++ src/test/isolation2/isolation2_schedule | 9 + .../uao/compaction_utility_insert.source | 2 +- .../output/uao/max_concurrency.source | 254 +++++++++--------- .../output/uao/max_concurrency2.source | 254 +++++++++--------- .../output/uao/select_after_vacuum.source | 12 +- .../uao/test_pg_appendonly_version.source | 63 +++++ .../output/uao/vacuum_cleanup.source | 26 +- .../uao/vacuum_self_serializable.source | 12 +- src/test/regress/output/gp_tablespace.source | 18 +- 25 files changed, 520 insertions(+), 374 deletions(-) create mode 100644 src/test/isolation2/input/uao/test_pg_appendonly_version.source create mode 100644 src/test/isolation2/output/uao/test_pg_appendonly_version.source diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index d6cd970abff..717f4d848ea 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -1351,10 +1351,10 @@ scanToFetchValue(AOCSFetchDesc aocsFetchDesc, /* * We fell into a hole inside the resolved block directory entry * we obtained from AppendOnlyBlockDirectory_GetEntry(). - * This should not be happening for versions >= PG12. Scream + * This should not be happening for versions >= GP7. Scream * appropriately. See AppendOnlyBlockDirectoryEntry for details. */ - ereportif(datumStream->ao_read.formatVersion >= AORelationVersion_PG12, + ereportif(aocsFetchDesc->relation->rd_appendonly->version >= AORelationVersion_GP7, ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("datum with row number %ld and col no %d not found in block directory entry range", rowNum, colno), @@ -2117,7 +2117,7 @@ aocs_addcol_newsegfile(AOCSAddColumnDesc desc, int version; /* Always write in the latest format */ - version = AORelationVersion_GetLatest(); + version = AOSegfileFormatVersion_GetLatest(); FormatAOSegmentFileName(basepath, seginfo->segno, colno, &fileSegNo, fn); diff --git a/src/backend/access/aocs/aocssegfiles.c b/src/backend/access/aocs/aocssegfiles.c index 9f389f94cd6..be273dee718 100644 --- a/src/backend/access/aocs/aocssegfiles.c +++ b/src/backend/access/aocs/aocssegfiles.c @@ -84,7 +84,7 @@ InsertInitialAOCSFileSegInfo(Relation prel, int32 segno, int32 nvp, Oid segrelid ValidateAppendonlySegmentDataBeforeStorage(segno); /* New segments are always created in the latest format */ - formatVersion = AORelationVersion_GetLatest(); + formatVersion = AOSegfileFormatVersion_GetLatest(); segrel = heap_open(segrelid, RowExclusiveLock); @@ -665,7 +665,7 @@ ClearAOCSFileSegInfo(Relation prel, int segno) repl[Anum_pg_aocs_varblockcount - 1] = true; /* When the segment is later recreated, it will be in new format */ - d[Anum_pg_aocs_formatversion - 1] = Int16GetDatum(AORelationVersion_GetLatest()); + d[Anum_pg_aocs_formatversion - 1] = Int16GetDatum(AOSegfileFormatVersion_GetLatest()); repl[Anum_pg_aocs_formatversion - 1] = true; /* We do not reset the modcount here */ diff --git a/src/backend/access/appendonly/aosegfiles.c b/src/backend/access/appendonly/aosegfiles.c index 2e651099cb8..1d832d7e0be 100644 --- a/src/backend/access/appendonly/aosegfiles.c +++ b/src/backend/access/appendonly/aosegfiles.c @@ -106,7 +106,7 @@ InsertInitialSegnoEntry(Relation parentrel, int segno) ValidateAppendonlySegmentDataBeforeStorage(segno); /* New segments are always created in the latest format */ - formatVersion = AORelationVersion_GetLatest(); + formatVersion = AOSegfileFormatVersion_GetLatest(); GetAppendOnlyEntryAuxOids(parentrel->rd_id, NULL, &segrelid, NULL, NULL, NULL, NULL); @@ -302,7 +302,7 @@ GetFileSegInfo(Relation parentrel, Snapshot appendOnlyMetaDataSnapshot, int segn ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid formatversion value: NULL"))); - AORelationVersion_CheckValid(fsinfo->formatversion); + AOSegfileFormatVersion_CheckValid(fsinfo->formatversion); /* get the state */ fsinfo->state = DatumGetInt16( @@ -494,7 +494,7 @@ GetAllFileSegInfo_pg_aoseg_rel(char *relationName, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid formatversion value: NULL"))); - AORelationVersion_CheckValid(formatversion); + AOSegfileFormatVersion_CheckValid(formatversion); oneseginfo->formatversion = DatumGetInt16(formatversion); /* get the state */ @@ -665,7 +665,7 @@ ClearFileSegInfo(Relation parentrel, int segno) new_record_repl[Anum_pg_aoseg_eofuncompressed - 1] = true; /* When the segment is later recreated, it will be in new format */ - new_record[Anum_pg_aoseg_formatversion - 1] = Int16GetDatum(AORelationVersion_GetLatest()); + new_record[Anum_pg_aoseg_formatversion - 1] = Int16GetDatum(AOSegfileFormatVersion_GetLatest()); new_record_repl[Anum_pg_aoseg_formatversion - 1] = true; /* We do not reset the modcount here */ diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index d92affc9226..0b2536f6f25 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -973,7 +973,7 @@ AppendOnlyExecutorReadBlock_ProcessTuple(AppendOnlyExecutorReadBlock *executorRe AOTupleId *aoTupleId = (AOTupleId *) &fake_ctid; int formatVersion = executorReadBlock->storageRead->formatVersion; - AORelationVersion_CheckValid(formatVersion); + AOSegfileFormatVersion_CheckValid(formatVersion); AOTupleIdInit(aoTupleId, executorReadBlock->segmentFileNum, rowNum); @@ -1942,7 +1942,6 @@ fetchFromCurrentBlock(AppendOnlyFetchDesc aoFetchDesc, bool fetched; AOFetchBlockMetadata *currentBlock = &aoFetchDesc->currentBlock; AppendOnlyExecutorReadBlock *executorReadBlock = &aoFetchDesc->executorReadBlock; - AppendOnlyStorageRead *storageRead = &aoFetchDesc->storageRead; AppendOnlyBlockDirectoryEntry *entry = ¤tBlock->blockDirectoryEntry; if (!currentBlock->gotContents) @@ -1967,10 +1966,10 @@ fetchFromCurrentBlock(AppendOnlyFetchDesc aoFetchDesc, /* * We fell into a hole inside the resolved block directory entry * we obtained from AppendOnlyBlockDirectory_GetEntry(). - * This should not be happening for versions >= PG12. Scream + * This should not be happening for versions >= GP7. Scream * appropriately. See AppendOnlyBlockDirectoryEntry for details. */ - ereportif(storageRead->formatVersion >= AORelationVersion_PG12, + ereportif(aoFetchDesc->relation->rd_appendonly->version >= AORelationVersion_GP7, ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("tuple with row number %ld not found in block directory entry range", rowNum), diff --git a/src/backend/access/appendonly/appendonlywriter.c b/src/backend/access/appendonly/appendonlywriter.c index 9405c3b1c6c..34eccbc828d 100644 --- a/src/backend/access/appendonly/appendonlywriter.c +++ b/src/backend/access/appendonly/appendonlywriter.c @@ -191,7 +191,7 @@ LockSegnoForWrite(Relation rel, int segno) elog(ERROR, "segfile %d is full", segno); /* Skip using the ao segment if not latest version (except as a compaction target) */ - if (formatversion != AORelationVersion_GetLatest()) + if (formatversion != AOSegfileFormatVersion_GetLatest()) elog(ERROR, "segfile %d is not of the latest version", segno); found = true; @@ -484,7 +484,7 @@ choose_segno_internal(Relation rel, List *avoid_segnos, choose_segno_mode mode) continue; /* Skip using the ao segment if not latest version (except as a compaction target) */ - if (formatversion != AORelationVersion_GetLatest()) + if (formatversion != AOSegfileFormatVersion_GetLatest()) continue; /* diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 9bd26ad8de5..f4d2f0612c2 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1764,7 +1764,8 @@ heap_create_with_catalog(const char *relname, InvalidOid, InvalidOid, InvalidOid, - InvalidOid); + InvalidOid, + AORelationVersion_GetLatest()); } /* diff --git a/src/backend/catalog/pg_appendonly.c b/src/backend/catalog/pg_appendonly.c index d3b49753af5..511b089199d 100644 --- a/src/backend/catalog/pg_appendonly.c +++ b/src/backend/catalog/pg_appendonly.c @@ -55,7 +55,8 @@ InsertAppendOnlyEntry(Oid relid, Oid blkdirrelid, Oid blkdiridxid, Oid visimaprelid, - Oid visimapidxid) + Oid visimapidxid, + int16 version) { Relation pg_appendonly_rel; HeapTuple pg_appendonly_tuple = NULL; @@ -97,6 +98,7 @@ InsertAppendOnlyEntry(Oid relid, values[Anum_pg_appendonly_blkdiridxid - 1] = ObjectIdGetDatum(blkdiridxid); values[Anum_pg_appendonly_visimaprelid - 1] = ObjectIdGetDatum(visimaprelid); values[Anum_pg_appendonly_visimapidxid - 1] = ObjectIdGetDatum(visimapidxid); + values[Anum_pg_appendonly_version - 1] = Int16GetDatum(version); /* * form the tuple and insert it diff --git a/src/backend/cdb/cdbappendonlystorageread.c b/src/backend/cdb/cdbappendonlystorageread.c index 7588fe15df4..76f9b4c8f59 100755 --- a/src/backend/cdb/cdbappendonlystorageread.c +++ b/src/backend/cdb/cdbappendonlystorageread.c @@ -267,7 +267,7 @@ AppendOnlyStorageRead_FinishOpenFile(AppendOnlyStorageRead *storageRead, { MemoryContext oldMemoryContext; - AORelationVersion_CheckValid(version); + AOSegfileFormatVersion_CheckValid(version); storageRead->file = file; storageRead->formatVersion = version; diff --git a/src/backend/cdb/cdbappendonlystoragewrite.c b/src/backend/cdb/cdbappendonlystoragewrite.c index 0f3155d5b25..16e5569378d 100755 --- a/src/backend/cdb/cdbappendonlystoragewrite.c +++ b/src/backend/cdb/cdbappendonlystoragewrite.c @@ -306,7 +306,7 @@ AppendOnlyStorageWrite_OpenFile(AppendOnlyStorageWrite *storageWrite, * Assume that we only write in the current latest format. (it's redundant * to pass the version number as argument, currently) */ - if (version != AORelationVersion_GetLatest()) + if (version != AOSegfileFormatVersion_GetLatest()) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("cannot write append-only table version %d", version))); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index c2b7ea583bc..ec47605ac43 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1084,8 +1084,24 @@ DefineIndex(Oid relationId, errmsg("append-only tables do not support unique indexes built concurrently"))); /* Additional version checks needed if block directory already exists */ - if (OidIsValid(blkdirrelid)) - ValidateRelationVersionForUniqueIndex(rel); + if (OidIsValid(blkdirrelid) && !AORelationVersion_Validate(rel, AORelationVersion_GP7)) + { + /* + * We currently raise an error in this scenario. We could alternatively + * recreate the block directory (and perform a relfile swap of the block + * directory relation, similar to alter table rewrites). Such a solution is + * complex enough and can be explored with appropriate user need. Block + * directory creation during DefineIndex() has exposed complexities in the + * past too, especially around locking when multiple indexes are being + * created at a time. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("append-only tables with older relation versions do not support unique indexes"), + errdetail("version found = %d, minimum version required = %d", rel->rd_appendonly->version, + AORelationVersion_GP7), + errhint("ALTER TABLE SET WITH (REORGANIZE = true) before creating the unique index"))); + } } /* diff --git a/src/include/catalog/aoblkdir.h b/src/include/catalog/aoblkdir.h index dc4648acf05..1b94ba90927 100644 --- a/src/include/catalog/aoblkdir.h +++ b/src/include/catalog/aoblkdir.h @@ -26,6 +26,5 @@ #define Anum_pg_aoblkdir_minipage 4 extern void AlterTableCreateAoBlkdirTable(Oid relOid); -extern void ValidateRelationVersionForUniqueIndex(Relation rel); #endif diff --git a/src/include/catalog/pg_appendonly.h b/src/include/catalog/pg_appendonly.h index 59f07ef0ca5..3ad63bbfaa2 100644 --- a/src/include/catalog/pg_appendonly.h +++ b/src/include/catalog/pg_appendonly.h @@ -38,6 +38,7 @@ CATALOG(pg_appendonly,6105,AppendOnlyRelationId) Oid blkdiridxid; /* if aoblkdir table, OID of aoblkdir index */ Oid visimaprelid; /* OID of the aovisimap table */ Oid visimapidxid; /* OID of aovisimap index */ + int16 version; /* AO relation version */ } FormData_pg_appendonly; /* GPDB added foreign key definitions for gpcheckcat. */ @@ -48,7 +49,7 @@ FOREIGN_KEY(relid REFERENCES pg_class(oid)); * (there are no var-length fields currentl.) */ #define APPENDONLY_TUPLE_SIZE \ - (offsetof(FormData_pg_appendonly,visimapidxid) + sizeof(Oid)) + (offsetof(FormData_pg_appendonly,version) + sizeof(Oid)) /* ---------------- * Form_pg_appendonly corresponds to a pointer to a tuple with @@ -59,30 +60,48 @@ typedef FormData_pg_appendonly *Form_pg_appendonly; /* * AORelationVersion defines valid values for the version of AppendOnlyEntry. - * NOTE: When this is updated, AoRelationVersion_GetLatest() must be updated accordingly. + * NOTE: When this is updated, AORelationVersion_GetLatest() must be updated accordingly. */ typedef enum AORelationVersion { - AORelationVersion_None = 0, - AORelationVersion_Original = 1, /* first valid version */ - AORelationVersion_Aligned64bit = 2, /* version where the fixes for AOBlock and MemTuple - * were introduced, see MPP-7251 and MPP-7372. */ - AORelationVersion_PG83 = 3, /* Same as Aligned64bit, but numerics are stored - * in the PostgreSQL 8.3 format. */ - AORelationVersion_PG12 = 4, /* version that removed block directory hole filling. */ - MaxAORelationVersion /* must always be last */ + AORelationVersion_None = 0, + AORelationVersion_GP6 = 1, + AORelationVersion_GP7 = 2, + MaxAORelationVersion } AORelationVersion; -#define AORelationVersion_GetLatest() AORelationVersion_PG12 - +#define AORelationVersion_GetLatest() AORelationVersion_GP7 +#define AORelationVersion_Get(relation) (relation)->rd_appendonly->version +#define AORelationVersion_Validate(relation, version) \ + (AORelationVersion_Get((relation)) >= (version)) #define AORelationVersion_IsValid(version) \ - (version > AORelationVersion_None && version < MaxAORelationVersion) + ((version) > AORelationVersion_None && (version) < MaxAORelationVersion) + +/* + * AOSegfileFormatVersion defines valid values for the version of AppendOnlyEntry. + * NOTE: When this is updated, AOSegfileFormatVersion_GetLatest() must be updated accordingly. + */ +typedef enum AOSegfileFormatVersion +{ + AOSegfileFormatVersion_None = 0, + AOSegfileFormatVersion_Original = 1, /* first valid version */ + AOSegfileFormatVersion_Aligned64bit = 2, /* version where the fixes for AOBlock and MemTuple + * were introduced, see MPP-7251 and MPP-7372. */ + AOSegfileFormatVersion_GP5 = 3, /* Same as Aligned64bit, but numerics are stored + * in the PostgreSQL 8.3 format. */ + MaxAOSegfileFormatVersion /* must always be last */ +} AOSegfileFormatVersion; + +#define AOSegfileFormatVersion_GetLatest() AOSegfileFormatVersion_GP5 + +#define AOSegfileFormatVersion_IsValid(version) \ + (version > AOSegfileFormatVersion_None && version < MaxAOSegfileFormatVersion) extern bool Debug_appendonly_print_verify_write_block; -static inline void AORelationVersion_CheckValid(int version) +static inline void AOSegfileFormatVersion_CheckValid(int version) { - if (!AORelationVersion_IsValid(version)) + if (!AOSegfileFormatVersion_IsValid(version)) { ereport(Debug_appendonly_print_verify_write_block?PANIC:ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -92,10 +111,19 @@ static inline void AORelationVersion_CheckValid(int version) } /* - * Versions higher than AORelationVersion_Original include the fixes for AOBlock and + * Versions higher than AOSegfileFormatVersion_Original include the fixes for AOBlock and * MemTuple alignment. */ #define IsAOBlockAndMemtupleAlignmentFixed(version) \ +( \ + AOSegfileFormatVersion_CheckValid(version), \ + (version > AOSegfileFormatVersion_Original) \ +) + +/* + * Are numerics stored in old, pre-PostgreSQL 8.3 format, and need converting? + */ +#define PG82NumericConversionNeeded(version) \ ( \ AORelationVersion_CheckValid(version), \ (version > AORelationVersion_Original) \ @@ -122,7 +150,8 @@ InsertAppendOnlyEntry(Oid relid, Oid blkdirrelid, Oid blkdiridxid, Oid visimaprelid, - Oid visimapidxid); + Oid visimapidxid, + int16 version); void GetAppendOnlyEntryAttributes(Oid relid, @@ -148,7 +177,6 @@ GetAppendOnlyEntryAuxOids(Oid relid, Oid *visimaprelid, Oid *visimapidxid); - void GetAppendOnlyEntry(Oid relid, Form_pg_appendonly aoEntry); /* diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index 83939c21481..dfeeff76453 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -130,7 +130,7 @@ typedef struct AppendOnlyExecutorReadBlock MemTupleBinding *mt_bind; /* - * When reading a segfile that's using version < AORelationVersion_PG83, + * When reading a segfile that's using version < AOSegfileFormatVersion_GP5, * that is, was created before GPDB 5.0 and upgraded with pg_upgrade, we need * to convert numeric attributes on the fly to new format. numericAtts * is an array of attribute numbers (0-based), of all numeric columns (including diff --git a/src/include/cdb/cdbappendonlystoragewrite.h b/src/include/cdb/cdbappendonlystoragewrite.h index 80127661c36..acbdfe0211b 100755 --- a/src/include/cdb/cdbappendonlystoragewrite.h +++ b/src/include/cdb/cdbappendonlystoragewrite.h @@ -51,7 +51,7 @@ typedef struct AppendOnlyStorageWrite /* * Version number indicating the AO table format version to write in. */ - AORelationVersion formatVersion; + AOSegfileFormatVersion formatVersion; /* * Name of the relation to use in system logging and error messages. diff --git a/src/test/isolation2/expected/uao_crash_compaction_row.out b/src/test/isolation2/expected/uao_crash_compaction_row.out index 3d34cbeebbf..5dd3bc113f0 100644 --- a/src/test/isolation2/expected/uao_crash_compaction_row.out +++ b/src/test/isolation2/expected/uao_crash_compaction_row.out @@ -107,8 +107,8 @@ ERROR: Error on receive from seg0 127.0.0.1:7002 pid=15584: server closed the c 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase') where segment_id = 0; segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 248 | 5 | 1 | 248 | 2 | 4 | 2 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 + 0 | 1 | 248 | 5 | 1 | 248 | 2 | 3 | 2 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 (2 rows) -- do vacuum again, there should be no await-dropping segment files, no concurrent -- transactions exist this time when the VACUUM is performed. @@ -117,11 +117,11 @@ VACUUM 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 - 1 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 - 2 | 1 | 200 | 4 | 1 | 200 | 1 | 4 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 + 1 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 + 2 | 1 | 200 | 4 | 1 | 200 | 1 | 3 | 1 (5 rows) 1:INSERT INTO crash_before_cleanup_phase VALUES(1, 1, 'c'), (25, 6, 'c'); INSERT 2 @@ -143,23 +143,23 @@ UPDATE 1 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 3 | 4 | 1 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 - 2 | 1 | 328 | 6 | 3 | 328 | 3 | 4 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 3 | 3 | 1 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 + 2 | 1 | 328 | 6 | 3 | 328 | 3 | 3 | 1 (5 rows) 1:VACUUM crash_before_cleanup_phase; VACUUM 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 3 | 4 | 1 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 - 2 | 1 | 0 | 0 | 0 | 0 | 3 | 4 | 1 - 2 | 2 | 248 | 5 | 1 | 248 | 0 | 4 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 3 | 3 | 1 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 + 2 | 1 | 0 | 0 | 0 | 0 | 3 | 3 | 1 + 2 | 2 | 248 | 5 | 1 | 248 | 0 | 3 | 1 (6 rows) 1:INSERT INTO crash_before_cleanup_phase VALUES(21, 1, 'c'), (26, 1, 'c'); INSERT 2 @@ -185,9 +185,9 @@ UPDATE 1 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_vacuum_in_appendonly_insert') where segno = 1; segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 496 | 10 | 2 | 496 | 2 | 4 | 1 - 1 | 1 | 128 | 2 | 2 | 128 | 2 | 4 | 1 - 2 | 1 | 400 | 8 | 2 | 400 | 2 | 4 | 1 + 0 | 1 | 496 | 10 | 2 | 496 | 2 | 3 | 1 + 1 | 1 | 128 | 2 | 2 | 128 | 2 | 3 | 1 + 2 | 1 | 400 | 8 | 2 | 400 | 2 | 3 | 1 (3 rows) -- verify the new segment files contain no tuples. 1:SELECT sum(tupcount) FROM gp_toolkit.__gp_aoseg('crash_vacuum_in_appendonly_insert') where segno = 2; @@ -200,12 +200,12 @@ VACUUM 1:SELECT * FROM gp_toolkit.__gp_aoseg('crash_vacuum_in_appendonly_insert'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 0 | 2 | 248 | 5 | 1 | 248 | 0 | 4 | 1 - 1 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 1 | 2 | 64 | 1 | 1 | 64 | 0 | 4 | 1 - 2 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 2 | 2 | 200 | 4 | 1 | 200 | 0 | 4 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 0 | 2 | 248 | 5 | 1 | 248 | 0 | 3 | 1 + 1 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 1 | 2 | 64 | 1 | 1 | 64 | 0 | 3 | 1 + 2 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 2 | 2 | 200 | 4 | 1 | 200 | 0 | 3 | 1 (6 rows) 1:INSERT INTO crash_vacuum_in_appendonly_insert VALUES(21, 1, 'c'), (26, 1, 'c'); INSERT 2 @@ -274,11 +274,11 @@ server closed the connection unexpectedly 4:SELECT * FROM gp_toolkit.__gp_aoseg('crash_master_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 248 | 5 | 1 | 248 | 2 | 4 | 2 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 2 | 4 | 2 - 1 | 2 | 0 | 0 | 0 | 0 | 0 | 4 | 1 - 2 | 1 | 200 | 4 | 1 | 200 | 1 | 4 | 1 + 0 | 1 | 248 | 5 | 1 | 248 | 2 | 3 | 2 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 2 | 3 | 2 + 1 | 2 | 0 | 0 | 0 | 0 | 0 | 3 | 1 + 2 | 1 | 200 | 4 | 1 | 200 | 1 | 3 | 1 (5 rows) 4:INSERT INTO crash_master_before_cleanup_phase VALUES(1, 1, 'c'), (25, 6, 'c'); INSERT 2 @@ -300,23 +300,23 @@ UPDATE 1 4:SELECT * FROM gp_toolkit.__gp_aoseg('crash_master_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 248 | 5 | 1 | 248 | 2 | 4 | 2 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 - 1 | 1 | 64 | 1 | 1 | 64 | 2 | 4 | 2 - 1 | 2 | 64 | 1 | 1 | 64 | 1 | 4 | 1 - 2 | 1 | 328 | 6 | 3 | 328 | 3 | 4 | 1 + 0 | 1 | 248 | 5 | 1 | 248 | 2 | 3 | 2 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 + 1 | 1 | 64 | 1 | 1 | 64 | 2 | 3 | 2 + 1 | 2 | 64 | 1 | 1 | 64 | 1 | 3 | 1 + 2 | 1 | 328 | 6 | 3 | 328 | 3 | 3 | 1 (5 rows) 4:VACUUM crash_master_before_cleanup_phase; VACUUM 4:SELECT * FROM gp_toolkit.__gp_aoseg('crash_master_before_cleanup_phase'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-----+----------+---------------+------------------+----------+---------------+------- - 0 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 0 | 2 | 160 | 3 | 1 | 160 | 0 | 4 | 1 - 1 | 1 | 0 | 0 | 0 | 0 | 2 | 4 | 1 - 1 | 2 | 64 | 1 | 1 | 64 | 1 | 4 | 1 - 2 | 1 | 0 | 0 | 0 | 0 | 3 | 4 | 1 - 2 | 2 | 248 | 5 | 1 | 248 | 0 | 4 | 1 + 0 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 0 | 2 | 160 | 3 | 1 | 160 | 0 | 3 | 1 + 1 | 1 | 0 | 0 | 0 | 0 | 2 | 3 | 1 + 1 | 2 | 64 | 1 | 1 | 64 | 1 | 3 | 1 + 2 | 1 | 0 | 0 | 0 | 0 | 3 | 3 | 1 + 2 | 2 | 248 | 5 | 1 | 248 | 0 | 3 | 1 (6 rows) 4:INSERT INTO crash_master_before_cleanup_phase VALUES(21, 1, 'c'), (26, 1, 'c'); INSERT 2 diff --git a/src/test/isolation2/input/uao/test_pg_appendonly_version.source b/src/test/isolation2/input/uao/test_pg_appendonly_version.source new file mode 100644 index 00000000000..e4830ba208a --- /dev/null +++ b/src/test/isolation2/input/uao/test_pg_appendonly_version.source @@ -0,0 +1,29 @@ +-- Validate pg_appendonly.version functioning by unique index creation +-- as it requires pg_appendonly.version >= AORelationVersion_GP7. + +create table @amname@_version_tbl (a int) using @amname@; + +-- unique index on AO is supported starting from version 2 (AORelationVersion_GP7) +select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; +create unique index on @amname@_version_tbl(a); +insert into @amname@_version_tbl select generate_series(1, 10); +create unique index on @amname@_version_tbl(a); +set enable_seqscan = off; +select * from @amname@_version_tbl where a = 2; + +-- modify pg_appendonly.version to older such like 1 (AORelationVersion_GP6) +set allow_system_table_mods = on; +update pg_appendonly set version = 1 where relid = '@amname@_version_tbl'::regclass; + +-- unique index on AO isn't supported on version < AORelationVersion_GP7 +select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; +create unique index on @amname@_version_tbl(a); + +-- alter table with reorganize to verify pg_appendonly being rewritten +alter table @amname@_version_tbl set with (reorganize = true); +select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; +create unique index on @amname@_version_tbl(a); +select * from @amname@_version_tbl where a = 3; + +drop table @amname@_version_tbl; +reset allow_system_table_mods; diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 2b8a3e12946..83823c5f31c 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -153,6 +153,11 @@ test: uao/vacuum_while_vacuum_row test: uao/vacuum_cleanup_row test: uao/bitmapindex_rescan_row test: uao/limit_indexscan_inits_row +test: uao/create_index_allows_readonly_row +test: uao/test_pg_appendonly_version_row +# Refer to the case comment for why it is commented out. +# test: uao/bad_buffer_on_temp_ao_row + test: reorganize_after_ao_vacuum_skip_drop truncate_after_ao_vacuum_skip_drop mark_all_aoseg_await_drop # below test(s) inject faults so each of them need to be in a separate group test: segwalrep/master_wal_switch @@ -204,6 +209,10 @@ test: uao/vacuum_while_vacuum_column test: uao/vacuum_cleanup_column test: uao/bitmapindex_rescan_column test: uao/limit_indexscan_inits_column +test: uao/create_index_allows_readonly_column +test: uao/test_pg_appendonly_version_column +# Refer to the case comment for why it is commented out. +# test: uao/bad_buffer_on_temp_ao_column # this case contains fault injection, must be put in a separate test group test: terminate_in_gang_creation diff --git a/src/test/isolation2/output/uao/compaction_utility_insert.source b/src/test/isolation2/output/uao/compaction_utility_insert.source index 8b40bc4763e..ba037d2ee6c 100644 --- a/src/test/isolation2/output/uao/compaction_utility_insert.source +++ b/src/test/isolation2/output/uao/compaction_utility_insert.source @@ -14,7 +14,7 @@ INSERT 1 SELECT *, segno, tupcount, state FROM gp_ao_or_aocs_seg('foo'); segment_id | segno | tupcount | modcount | formatversion | state | segno | tupcount | state ------------+-------+----------+----------+---------------+-------+-------+----------+------- - 0 | 0 | 2 | 2 | 4 | 1 | 0 | 2 | 1 + 0 | 0 | 2 | 2 | 3 | 1 | 0 | 2 | 1 (1 row) DELETE FROM foo WHERE a = 2; DELETE 1 diff --git a/src/test/isolation2/output/uao/max_concurrency.source b/src/test/isolation2/output/uao/max_concurrency.source index bead35a6647..9423f38f5e6 100644 --- a/src/test/isolation2/output/uao/max_concurrency.source +++ b/src/test/isolation2/output/uao/max_concurrency.source @@ -904,133 +904,133 @@ SELECT * FROM ao; SELECT * FROM gp_ao_or_aocs_seg('ao') ORDER BY segno; segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 1 | 1 | 1 | 1 | 4 | 1 - 1 | 2 | 1 | 1 | 4 | 1 - 1 | 3 | 1 | 1 | 4 | 1 - 1 | 4 | 1 | 1 | 4 | 1 - 1 | 5 | 1 | 1 | 4 | 1 - 1 | 6 | 1 | 1 | 4 | 1 - 1 | 7 | 1 | 1 | 4 | 1 - 1 | 8 | 1 | 1 | 4 | 1 - 1 | 9 | 1 | 1 | 4 | 1 - 1 | 10 | 1 | 1 | 4 | 1 - 1 | 11 | 1 | 1 | 4 | 1 - 1 | 12 | 1 | 1 | 4 | 1 - 1 | 13 | 1 | 1 | 4 | 1 - 1 | 14 | 1 | 1 | 4 | 1 - 1 | 15 | 1 | 1 | 4 | 1 - 1 | 16 | 1 | 1 | 4 | 1 - 1 | 17 | 1 | 1 | 4 | 1 - 1 | 18 | 1 | 1 | 4 | 1 - 1 | 19 | 1 | 1 | 4 | 1 - 1 | 20 | 1 | 1 | 4 | 1 - 1 | 21 | 1 | 1 | 4 | 1 - 1 | 22 | 1 | 1 | 4 | 1 - 1 | 23 | 1 | 1 | 4 | 1 - 1 | 24 | 1 | 1 | 4 | 1 - 1 | 25 | 1 | 1 | 4 | 1 - 1 | 26 | 1 | 1 | 4 | 1 - 1 | 27 | 1 | 1 | 4 | 1 - 1 | 28 | 1 | 1 | 4 | 1 - 1 | 29 | 1 | 1 | 4 | 1 - 1 | 30 | 1 | 1 | 4 | 1 - 1 | 31 | 1 | 1 | 4 | 1 - 1 | 32 | 1 | 1 | 4 | 1 - 1 | 33 | 1 | 1 | 4 | 1 - 1 | 34 | 1 | 1 | 4 | 1 - 1 | 35 | 1 | 1 | 4 | 1 - 1 | 36 | 1 | 1 | 4 | 1 - 1 | 37 | 1 | 1 | 4 | 1 - 1 | 38 | 1 | 1 | 4 | 1 - 1 | 39 | 1 | 1 | 4 | 1 - 1 | 40 | 1 | 1 | 4 | 1 - 1 | 41 | 1 | 1 | 4 | 1 - 1 | 42 | 1 | 1 | 4 | 1 - 1 | 43 | 1 | 1 | 4 | 1 - 1 | 44 | 1 | 1 | 4 | 1 - 1 | 45 | 1 | 1 | 4 | 1 - 1 | 46 | 1 | 1 | 4 | 1 - 1 | 47 | 1 | 1 | 4 | 1 - 1 | 48 | 1 | 1 | 4 | 1 - 1 | 49 | 1 | 1 | 4 | 1 - 1 | 50 | 1 | 1 | 4 | 1 - 1 | 51 | 1 | 1 | 4 | 1 - 1 | 52 | 1 | 1 | 4 | 1 - 1 | 53 | 1 | 1 | 4 | 1 - 1 | 54 | 1 | 1 | 4 | 1 - 1 | 55 | 1 | 1 | 4 | 1 - 1 | 56 | 1 | 1 | 4 | 1 - 1 | 57 | 1 | 1 | 4 | 1 - 1 | 58 | 1 | 1 | 4 | 1 - 1 | 59 | 1 | 1 | 4 | 1 - 1 | 60 | 1 | 1 | 4 | 1 - 1 | 61 | 1 | 1 | 4 | 1 - 1 | 62 | 1 | 1 | 4 | 1 - 1 | 63 | 1 | 1 | 4 | 1 - 1 | 64 | 1 | 1 | 4 | 1 - 1 | 65 | 1 | 1 | 4 | 1 - 1 | 66 | 1 | 1 | 4 | 1 - 1 | 67 | 1 | 1 | 4 | 1 - 1 | 68 | 1 | 1 | 4 | 1 - 1 | 69 | 1 | 1 | 4 | 1 - 1 | 70 | 1 | 1 | 4 | 1 - 1 | 71 | 1 | 1 | 4 | 1 - 1 | 72 | 1 | 1 | 4 | 1 - 1 | 73 | 1 | 1 | 4 | 1 - 1 | 74 | 1 | 1 | 4 | 1 - 1 | 75 | 1 | 1 | 4 | 1 - 1 | 76 | 1 | 1 | 4 | 1 - 1 | 77 | 1 | 1 | 4 | 1 - 1 | 78 | 1 | 1 | 4 | 1 - 1 | 79 | 1 | 1 | 4 | 1 - 1 | 80 | 1 | 1 | 4 | 1 - 1 | 81 | 1 | 1 | 4 | 1 - 1 | 82 | 1 | 1 | 4 | 1 - 1 | 83 | 1 | 1 | 4 | 1 - 1 | 84 | 1 | 1 | 4 | 1 - 1 | 85 | 1 | 1 | 4 | 1 - 1 | 86 | 1 | 1 | 4 | 1 - 1 | 87 | 1 | 1 | 4 | 1 - 1 | 88 | 1 | 1 | 4 | 1 - 1 | 89 | 1 | 1 | 4 | 1 - 1 | 90 | 1 | 1 | 4 | 1 - 1 | 91 | 1 | 1 | 4 | 1 - 1 | 92 | 1 | 1 | 4 | 1 - 1 | 93 | 1 | 1 | 4 | 1 - 1 | 94 | 1 | 1 | 4 | 1 - 1 | 95 | 1 | 1 | 4 | 1 - 1 | 96 | 1 | 1 | 4 | 1 - 1 | 97 | 1 | 1 | 4 | 1 - 1 | 98 | 1 | 1 | 4 | 1 - 1 | 99 | 1 | 1 | 4 | 1 - 1 | 100 | 1 | 1 | 4 | 1 - 1 | 101 | 1 | 1 | 4 | 1 - 1 | 102 | 1 | 1 | 4 | 1 - 1 | 103 | 1 | 1 | 4 | 1 - 1 | 104 | 1 | 1 | 4 | 1 - 1 | 105 | 1 | 1 | 4 | 1 - 1 | 106 | 1 | 1 | 4 | 1 - 1 | 107 | 1 | 1 | 4 | 1 - 1 | 108 | 1 | 1 | 4 | 1 - 1 | 109 | 1 | 1 | 4 | 1 - 1 | 110 | 1 | 1 | 4 | 1 - 1 | 111 | 1 | 1 | 4 | 1 - 1 | 112 | 1 | 1 | 4 | 1 - 1 | 113 | 1 | 1 | 4 | 1 - 1 | 114 | 1 | 1 | 4 | 1 - 1 | 115 | 1 | 1 | 4 | 1 - 1 | 116 | 1 | 1 | 4 | 1 - 1 | 117 | 1 | 1 | 4 | 1 - 1 | 118 | 1 | 1 | 4 | 1 - 1 | 119 | 1 | 1 | 4 | 1 - 1 | 120 | 1 | 1 | 4 | 1 - 1 | 121 | 1 | 1 | 4 | 1 - 1 | 122 | 1 | 1 | 4 | 1 - 1 | 123 | 1 | 1 | 4 | 1 - 1 | 124 | 1 | 1 | 4 | 1 - 1 | 125 | 1 | 1 | 4 | 1 - 1 | 126 | 1 | 1 | 4 | 1 - 1 | 127 | 1 | 1 | 4 | 1 + 1 | 1 | 1 | 1 | 3 | 1 + 1 | 2 | 1 | 1 | 3 | 1 + 1 | 3 | 1 | 1 | 3 | 1 + 1 | 4 | 1 | 1 | 3 | 1 + 1 | 5 | 1 | 1 | 3 | 1 + 1 | 6 | 1 | 1 | 3 | 1 + 1 | 7 | 1 | 1 | 3 | 1 + 1 | 8 | 1 | 1 | 3 | 1 + 1 | 9 | 1 | 1 | 3 | 1 + 1 | 10 | 1 | 1 | 3 | 1 + 1 | 11 | 1 | 1 | 3 | 1 + 1 | 12 | 1 | 1 | 3 | 1 + 1 | 13 | 1 | 1 | 3 | 1 + 1 | 14 | 1 | 1 | 3 | 1 + 1 | 15 | 1 | 1 | 3 | 1 + 1 | 16 | 1 | 1 | 3 | 1 + 1 | 17 | 1 | 1 | 3 | 1 + 1 | 18 | 1 | 1 | 3 | 1 + 1 | 19 | 1 | 1 | 3 | 1 + 1 | 20 | 1 | 1 | 3 | 1 + 1 | 21 | 1 | 1 | 3 | 1 + 1 | 22 | 1 | 1 | 3 | 1 + 1 | 23 | 1 | 1 | 3 | 1 + 1 | 24 | 1 | 1 | 3 | 1 + 1 | 25 | 1 | 1 | 3 | 1 + 1 | 26 | 1 | 1 | 3 | 1 + 1 | 27 | 1 | 1 | 3 | 1 + 1 | 28 | 1 | 1 | 3 | 1 + 1 | 29 | 1 | 1 | 3 | 1 + 1 | 30 | 1 | 1 | 3 | 1 + 1 | 31 | 1 | 1 | 3 | 1 + 1 | 32 | 1 | 1 | 3 | 1 + 1 | 33 | 1 | 1 | 3 | 1 + 1 | 34 | 1 | 1 | 3 | 1 + 1 | 35 | 1 | 1 | 3 | 1 + 1 | 36 | 1 | 1 | 3 | 1 + 1 | 37 | 1 | 1 | 3 | 1 + 1 | 38 | 1 | 1 | 3 | 1 + 1 | 39 | 1 | 1 | 3 | 1 + 1 | 40 | 1 | 1 | 3 | 1 + 1 | 41 | 1 | 1 | 3 | 1 + 1 | 42 | 1 | 1 | 3 | 1 + 1 | 43 | 1 | 1 | 3 | 1 + 1 | 44 | 1 | 1 | 3 | 1 + 1 | 45 | 1 | 1 | 3 | 1 + 1 | 46 | 1 | 1 | 3 | 1 + 1 | 47 | 1 | 1 | 3 | 1 + 1 | 48 | 1 | 1 | 3 | 1 + 1 | 49 | 1 | 1 | 3 | 1 + 1 | 50 | 1 | 1 | 3 | 1 + 1 | 51 | 1 | 1 | 3 | 1 + 1 | 52 | 1 | 1 | 3 | 1 + 1 | 53 | 1 | 1 | 3 | 1 + 1 | 54 | 1 | 1 | 3 | 1 + 1 | 55 | 1 | 1 | 3 | 1 + 1 | 56 | 1 | 1 | 3 | 1 + 1 | 57 | 1 | 1 | 3 | 1 + 1 | 58 | 1 | 1 | 3 | 1 + 1 | 59 | 1 | 1 | 3 | 1 + 1 | 60 | 1 | 1 | 3 | 1 + 1 | 61 | 1 | 1 | 3 | 1 + 1 | 62 | 1 | 1 | 3 | 1 + 1 | 63 | 1 | 1 | 3 | 1 + 1 | 64 | 1 | 1 | 3 | 1 + 1 | 65 | 1 | 1 | 3 | 1 + 1 | 66 | 1 | 1 | 3 | 1 + 1 | 67 | 1 | 1 | 3 | 1 + 1 | 68 | 1 | 1 | 3 | 1 + 1 | 69 | 1 | 1 | 3 | 1 + 1 | 70 | 1 | 1 | 3 | 1 + 1 | 71 | 1 | 1 | 3 | 1 + 1 | 72 | 1 | 1 | 3 | 1 + 1 | 73 | 1 | 1 | 3 | 1 + 1 | 74 | 1 | 1 | 3 | 1 + 1 | 75 | 1 | 1 | 3 | 1 + 1 | 76 | 1 | 1 | 3 | 1 + 1 | 77 | 1 | 1 | 3 | 1 + 1 | 78 | 1 | 1 | 3 | 1 + 1 | 79 | 1 | 1 | 3 | 1 + 1 | 80 | 1 | 1 | 3 | 1 + 1 | 81 | 1 | 1 | 3 | 1 + 1 | 82 | 1 | 1 | 3 | 1 + 1 | 83 | 1 | 1 | 3 | 1 + 1 | 84 | 1 | 1 | 3 | 1 + 1 | 85 | 1 | 1 | 3 | 1 + 1 | 86 | 1 | 1 | 3 | 1 + 1 | 87 | 1 | 1 | 3 | 1 + 1 | 88 | 1 | 1 | 3 | 1 + 1 | 89 | 1 | 1 | 3 | 1 + 1 | 90 | 1 | 1 | 3 | 1 + 1 | 91 | 1 | 1 | 3 | 1 + 1 | 92 | 1 | 1 | 3 | 1 + 1 | 93 | 1 | 1 | 3 | 1 + 1 | 94 | 1 | 1 | 3 | 1 + 1 | 95 | 1 | 1 | 3 | 1 + 1 | 96 | 1 | 1 | 3 | 1 + 1 | 97 | 1 | 1 | 3 | 1 + 1 | 98 | 1 | 1 | 3 | 1 + 1 | 99 | 1 | 1 | 3 | 1 + 1 | 100 | 1 | 1 | 3 | 1 + 1 | 101 | 1 | 1 | 3 | 1 + 1 | 102 | 1 | 1 | 3 | 1 + 1 | 103 | 1 | 1 | 3 | 1 + 1 | 104 | 1 | 1 | 3 | 1 + 1 | 105 | 1 | 1 | 3 | 1 + 1 | 106 | 1 | 1 | 3 | 1 + 1 | 107 | 1 | 1 | 3 | 1 + 1 | 108 | 1 | 1 | 3 | 1 + 1 | 109 | 1 | 1 | 3 | 1 + 1 | 110 | 1 | 1 | 3 | 1 + 1 | 111 | 1 | 1 | 3 | 1 + 1 | 112 | 1 | 1 | 3 | 1 + 1 | 113 | 1 | 1 | 3 | 1 + 1 | 114 | 1 | 1 | 3 | 1 + 1 | 115 | 1 | 1 | 3 | 1 + 1 | 116 | 1 | 1 | 3 | 1 + 1 | 117 | 1 | 1 | 3 | 1 + 1 | 118 | 1 | 1 | 3 | 1 + 1 | 119 | 1 | 1 | 3 | 1 + 1 | 120 | 1 | 1 | 3 | 1 + 1 | 121 | 1 | 1 | 3 | 1 + 1 | 122 | 1 | 1 | 3 | 1 + 1 | 123 | 1 | 1 | 3 | 1 + 1 | 124 | 1 | 1 | 3 | 1 + 1 | 125 | 1 | 1 | 3 | 1 + 1 | 126 | 1 | 1 | 3 | 1 + 1 | 127 | 1 | 1 | 3 | 1 (127 rows) ALTER RESOURCE GROUP admin_group SET CONCURRENCY 20; diff --git a/src/test/isolation2/output/uao/max_concurrency2.source b/src/test/isolation2/output/uao/max_concurrency2.source index 49592999be2..a83f9de1bdc 100644 --- a/src/test/isolation2/output/uao/max_concurrency2.source +++ b/src/test/isolation2/output/uao/max_concurrency2.source @@ -918,133 +918,133 @@ SELECT * FROM ao; SELECT * FROM gp_ao_or_aocs_seg('ao') ORDER BY segno; segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 1 | 1 | 1 | 1 | 4 | 1 - 1 | 2 | 1 | 1 | 4 | 1 - 1 | 3 | 1 | 1 | 4 | 1 - 1 | 4 | 1 | 1 | 4 | 1 - 1 | 5 | 1 | 1 | 4 | 1 - 1 | 6 | 1 | 1 | 4 | 1 - 1 | 7 | 1 | 1 | 4 | 1 - 1 | 8 | 1 | 1 | 4 | 1 - 1 | 9 | 1 | 1 | 4 | 1 - 1 | 10 | 1 | 1 | 4 | 1 - 1 | 11 | 1 | 1 | 4 | 1 - 1 | 12 | 1 | 1 | 4 | 1 - 1 | 13 | 1 | 1 | 4 | 1 - 1 | 14 | 1 | 1 | 4 | 1 - 1 | 15 | 1 | 1 | 4 | 1 - 1 | 16 | 1 | 1 | 4 | 1 - 1 | 17 | 1 | 1 | 4 | 1 - 1 | 18 | 1 | 1 | 4 | 1 - 1 | 19 | 1 | 1 | 4 | 1 - 1 | 20 | 1 | 1 | 4 | 1 - 1 | 21 | 1 | 1 | 4 | 1 - 1 | 22 | 1 | 1 | 4 | 1 - 1 | 23 | 1 | 1 | 4 | 1 - 1 | 24 | 1 | 1 | 4 | 1 - 1 | 25 | 1 | 1 | 4 | 1 - 1 | 26 | 1 | 1 | 4 | 1 - 1 | 27 | 1 | 1 | 4 | 1 - 1 | 28 | 1 | 1 | 4 | 1 - 1 | 29 | 1 | 1 | 4 | 1 - 1 | 30 | 1 | 1 | 4 | 1 - 1 | 31 | 1 | 1 | 4 | 1 - 1 | 32 | 1 | 1 | 4 | 1 - 1 | 33 | 1 | 1 | 4 | 1 - 1 | 34 | 1 | 1 | 4 | 1 - 1 | 35 | 1 | 1 | 4 | 1 - 1 | 36 | 1 | 1 | 4 | 1 - 1 | 37 | 1 | 1 | 4 | 1 - 1 | 38 | 1 | 1 | 4 | 1 - 1 | 39 | 1 | 1 | 4 | 1 - 1 | 40 | 1 | 1 | 4 | 1 - 1 | 41 | 1 | 1 | 4 | 1 - 1 | 42 | 1 | 1 | 4 | 1 - 1 | 43 | 1 | 1 | 4 | 1 - 1 | 44 | 1 | 1 | 4 | 1 - 1 | 45 | 1 | 1 | 4 | 1 - 1 | 46 | 1 | 1 | 4 | 1 - 1 | 47 | 1 | 1 | 4 | 1 - 1 | 48 | 1 | 1 | 4 | 1 - 1 | 49 | 1 | 1 | 4 | 1 - 1 | 50 | 1 | 1 | 4 | 1 - 1 | 51 | 1 | 1 | 4 | 1 - 1 | 52 | 1 | 1 | 4 | 1 - 1 | 53 | 1 | 1 | 4 | 1 - 1 | 54 | 1 | 1 | 4 | 1 - 1 | 55 | 1 | 1 | 4 | 1 - 1 | 56 | 1 | 1 | 4 | 1 - 1 | 57 | 1 | 1 | 4 | 1 - 1 | 58 | 1 | 1 | 4 | 1 - 1 | 59 | 1 | 1 | 4 | 1 - 1 | 60 | 1 | 1 | 4 | 1 - 1 | 61 | 1 | 1 | 4 | 1 - 1 | 62 | 1 | 1 | 4 | 1 - 1 | 63 | 1 | 1 | 4 | 1 - 1 | 64 | 1 | 1 | 4 | 1 - 1 | 65 | 1 | 1 | 4 | 1 - 1 | 66 | 1 | 1 | 4 | 1 - 1 | 67 | 1 | 1 | 4 | 1 - 1 | 68 | 1 | 1 | 4 | 1 - 1 | 69 | 1 | 1 | 4 | 1 - 1 | 70 | 1 | 1 | 4 | 1 - 1 | 71 | 1 | 1 | 4 | 1 - 1 | 72 | 1 | 1 | 4 | 1 - 1 | 73 | 1 | 1 | 4 | 1 - 1 | 74 | 1 | 1 | 4 | 1 - 1 | 75 | 1 | 1 | 4 | 1 - 1 | 76 | 1 | 1 | 4 | 1 - 1 | 77 | 1 | 1 | 4 | 1 - 1 | 78 | 1 | 1 | 4 | 1 - 1 | 79 | 1 | 1 | 4 | 1 - 1 | 80 | 1 | 1 | 4 | 1 - 1 | 81 | 1 | 1 | 4 | 1 - 1 | 82 | 1 | 1 | 4 | 1 - 1 | 83 | 1 | 1 | 4 | 1 - 1 | 84 | 1 | 1 | 4 | 1 - 1 | 85 | 1 | 1 | 4 | 1 - 1 | 86 | 1 | 1 | 4 | 1 - 1 | 87 | 1 | 1 | 4 | 1 - 1 | 88 | 1 | 1 | 4 | 1 - 1 | 89 | 1 | 1 | 4 | 1 - 1 | 90 | 1 | 1 | 4 | 1 - 1 | 91 | 1 | 1 | 4 | 1 - 1 | 92 | 1 | 1 | 4 | 1 - 1 | 93 | 1 | 1 | 4 | 1 - 1 | 94 | 1 | 1 | 4 | 1 - 1 | 95 | 1 | 1 | 4 | 1 - 1 | 96 | 1 | 1 | 4 | 1 - 1 | 97 | 1 | 1 | 4 | 1 - 1 | 98 | 1 | 1 | 4 | 1 - 1 | 99 | 1 | 1 | 4 | 1 - 1 | 100 | 1 | 1 | 4 | 1 - 1 | 101 | 1 | 1 | 4 | 1 - 1 | 102 | 1 | 1 | 4 | 1 - 1 | 103 | 1 | 1 | 4 | 1 - 1 | 104 | 1 | 1 | 4 | 1 - 1 | 105 | 1 | 1 | 4 | 1 - 1 | 106 | 1 | 1 | 4 | 1 - 1 | 107 | 1 | 1 | 4 | 1 - 1 | 108 | 1 | 1 | 4 | 1 - 1 | 109 | 1 | 1 | 4 | 1 - 1 | 110 | 1 | 1 | 4 | 1 - 1 | 111 | 1 | 1 | 4 | 1 - 1 | 112 | 1 | 1 | 4 | 1 - 1 | 113 | 1 | 1 | 4 | 1 - 1 | 114 | 1 | 1 | 4 | 1 - 1 | 115 | 1 | 1 | 4 | 1 - 1 | 116 | 1 | 1 | 4 | 1 - 1 | 117 | 1 | 1 | 4 | 1 - 1 | 118 | 1 | 1 | 4 | 1 - 1 | 119 | 1 | 1 | 4 | 1 - 1 | 120 | 1 | 1 | 4 | 1 - 1 | 121 | 1 | 1 | 4 | 1 - 1 | 122 | 1 | 1 | 4 | 1 - 1 | 123 | 1 | 1 | 4 | 1 - 1 | 124 | 1 | 1 | 4 | 1 - 1 | 125 | 1 | 1 | 4 | 1 - 1 | 126 | 1 | 1 | 4 | 1 - 1 | 127 | 1 | 1 | 4 | 1 + 1 | 1 | 1 | 1 | 3 | 1 + 1 | 2 | 1 | 1 | 3 | 1 + 1 | 3 | 1 | 1 | 3 | 1 + 1 | 4 | 1 | 1 | 3 | 1 + 1 | 5 | 1 | 1 | 3 | 1 + 1 | 6 | 1 | 1 | 3 | 1 + 1 | 7 | 1 | 1 | 3 | 1 + 1 | 8 | 1 | 1 | 3 | 1 + 1 | 9 | 1 | 1 | 3 | 1 + 1 | 10 | 1 | 1 | 3 | 1 + 1 | 11 | 1 | 1 | 3 | 1 + 1 | 12 | 1 | 1 | 3 | 1 + 1 | 13 | 1 | 1 | 3 | 1 + 1 | 14 | 1 | 1 | 3 | 1 + 1 | 15 | 1 | 1 | 3 | 1 + 1 | 16 | 1 | 1 | 3 | 1 + 1 | 17 | 1 | 1 | 3 | 1 + 1 | 18 | 1 | 1 | 3 | 1 + 1 | 19 | 1 | 1 | 3 | 1 + 1 | 20 | 1 | 1 | 3 | 1 + 1 | 21 | 1 | 1 | 3 | 1 + 1 | 22 | 1 | 1 | 3 | 1 + 1 | 23 | 1 | 1 | 3 | 1 + 1 | 24 | 1 | 1 | 3 | 1 + 1 | 25 | 1 | 1 | 3 | 1 + 1 | 26 | 1 | 1 | 3 | 1 + 1 | 27 | 1 | 1 | 3 | 1 + 1 | 28 | 1 | 1 | 3 | 1 + 1 | 29 | 1 | 1 | 3 | 1 + 1 | 30 | 1 | 1 | 3 | 1 + 1 | 31 | 1 | 1 | 3 | 1 + 1 | 32 | 1 | 1 | 3 | 1 + 1 | 33 | 1 | 1 | 3 | 1 + 1 | 34 | 1 | 1 | 3 | 1 + 1 | 35 | 1 | 1 | 3 | 1 + 1 | 36 | 1 | 1 | 3 | 1 + 1 | 37 | 1 | 1 | 3 | 1 + 1 | 38 | 1 | 1 | 3 | 1 + 1 | 39 | 1 | 1 | 3 | 1 + 1 | 40 | 1 | 1 | 3 | 1 + 1 | 41 | 1 | 1 | 3 | 1 + 1 | 42 | 1 | 1 | 3 | 1 + 1 | 43 | 1 | 1 | 3 | 1 + 1 | 44 | 1 | 1 | 3 | 1 + 1 | 45 | 1 | 1 | 3 | 1 + 1 | 46 | 1 | 1 | 3 | 1 + 1 | 47 | 1 | 1 | 3 | 1 + 1 | 48 | 1 | 1 | 3 | 1 + 1 | 49 | 1 | 1 | 3 | 1 + 1 | 50 | 1 | 1 | 3 | 1 + 1 | 51 | 1 | 1 | 3 | 1 + 1 | 52 | 1 | 1 | 3 | 1 + 1 | 53 | 1 | 1 | 3 | 1 + 1 | 54 | 1 | 1 | 3 | 1 + 1 | 55 | 1 | 1 | 3 | 1 + 1 | 56 | 1 | 1 | 3 | 1 + 1 | 57 | 1 | 1 | 3 | 1 + 1 | 58 | 1 | 1 | 3 | 1 + 1 | 59 | 1 | 1 | 3 | 1 + 1 | 60 | 1 | 1 | 3 | 1 + 1 | 61 | 1 | 1 | 3 | 1 + 1 | 62 | 1 | 1 | 3 | 1 + 1 | 63 | 1 | 1 | 3 | 1 + 1 | 64 | 1 | 1 | 3 | 1 + 1 | 65 | 1 | 1 | 3 | 1 + 1 | 66 | 1 | 1 | 3 | 1 + 1 | 67 | 1 | 1 | 3 | 1 + 1 | 68 | 1 | 1 | 3 | 1 + 1 | 69 | 1 | 1 | 3 | 1 + 1 | 70 | 1 | 1 | 3 | 1 + 1 | 71 | 1 | 1 | 3 | 1 + 1 | 72 | 1 | 1 | 3 | 1 + 1 | 73 | 1 | 1 | 3 | 1 + 1 | 74 | 1 | 1 | 3 | 1 + 1 | 75 | 1 | 1 | 3 | 1 + 1 | 76 | 1 | 1 | 3 | 1 + 1 | 77 | 1 | 1 | 3 | 1 + 1 | 78 | 1 | 1 | 3 | 1 + 1 | 79 | 1 | 1 | 3 | 1 + 1 | 80 | 1 | 1 | 3 | 1 + 1 | 81 | 1 | 1 | 3 | 1 + 1 | 82 | 1 | 1 | 3 | 1 + 1 | 83 | 1 | 1 | 3 | 1 + 1 | 84 | 1 | 1 | 3 | 1 + 1 | 85 | 1 | 1 | 3 | 1 + 1 | 86 | 1 | 1 | 3 | 1 + 1 | 87 | 1 | 1 | 3 | 1 + 1 | 88 | 1 | 1 | 3 | 1 + 1 | 89 | 1 | 1 | 3 | 1 + 1 | 90 | 1 | 1 | 3 | 1 + 1 | 91 | 1 | 1 | 3 | 1 + 1 | 92 | 1 | 1 | 3 | 1 + 1 | 93 | 1 | 1 | 3 | 1 + 1 | 94 | 1 | 1 | 3 | 1 + 1 | 95 | 1 | 1 | 3 | 1 + 1 | 96 | 1 | 1 | 3 | 1 + 1 | 97 | 1 | 1 | 3 | 1 + 1 | 98 | 1 | 1 | 3 | 1 + 1 | 99 | 1 | 1 | 3 | 1 + 1 | 100 | 1 | 1 | 3 | 1 + 1 | 101 | 1 | 1 | 3 | 1 + 1 | 102 | 1 | 1 | 3 | 1 + 1 | 103 | 1 | 1 | 3 | 1 + 1 | 104 | 1 | 1 | 3 | 1 + 1 | 105 | 1 | 1 | 3 | 1 + 1 | 106 | 1 | 1 | 3 | 1 + 1 | 107 | 1 | 1 | 3 | 1 + 1 | 108 | 1 | 1 | 3 | 1 + 1 | 109 | 1 | 1 | 3 | 1 + 1 | 110 | 1 | 1 | 3 | 1 + 1 | 111 | 1 | 1 | 3 | 1 + 1 | 112 | 1 | 1 | 3 | 1 + 1 | 113 | 1 | 1 | 3 | 1 + 1 | 114 | 1 | 1 | 3 | 1 + 1 | 115 | 1 | 1 | 3 | 1 + 1 | 116 | 1 | 1 | 3 | 1 + 1 | 117 | 1 | 1 | 3 | 1 + 1 | 118 | 1 | 1 | 3 | 1 + 1 | 119 | 1 | 1 | 3 | 1 + 1 | 120 | 1 | 1 | 3 | 1 + 1 | 121 | 1 | 1 | 3 | 1 + 1 | 122 | 1 | 1 | 3 | 1 + 1 | 123 | 1 | 1 | 3 | 1 + 1 | 124 | 1 | 1 | 3 | 1 + 1 | 125 | 1 | 1 | 3 | 1 + 1 | 126 | 1 | 1 | 3 | 1 + 1 | 127 | 1 | 1 | 3 | 1 (127 rows) ALTER RESOURCE GROUP admin_group SET CONCURRENCY 20; diff --git a/src/test/isolation2/output/uao/select_after_vacuum.source b/src/test/isolation2/output/uao/select_after_vacuum.source index 3a01ec94561..5227d1fe7c1 100644 --- a/src/test/isolation2/output/uao/select_after_vacuum.source +++ b/src/test/isolation2/output/uao/select_after_vacuum.source @@ -109,10 +109,10 @@ INSERT 1 0: SELECT * FROM gp_ao_or_aocs_seg('ao'); segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 0 | 1 | 7098 | 22 | 4 | 2 - 0 | 2 | 6069 | 0 | 4 | 1 - 1 | 1 | 6762 | 22 | 4 | 2 - 1 | 2 | 5923 | 1 | 4 | 1 - 2 | 1 | 7140 | 22 | 4 | 2 - 2 | 2 | 6342 | 0 | 4 | 1 + 0 | 1 | 7098 | 22 | 3 | 2 + 0 | 2 | 6069 | 0 | 3 | 1 + 1 | 1 | 6762 | 22 | 3 | 2 + 1 | 2 | 5923 | 1 | 3 | 1 + 2 | 1 | 7140 | 22 | 3 | 2 + 2 | 2 | 6342 | 0 | 3 | 1 (6 rows) diff --git a/src/test/isolation2/output/uao/test_pg_appendonly_version.source b/src/test/isolation2/output/uao/test_pg_appendonly_version.source new file mode 100644 index 00000000000..bfdbb067cea --- /dev/null +++ b/src/test/isolation2/output/uao/test_pg_appendonly_version.source @@ -0,0 +1,63 @@ +-- Validate pg_appendonly.version functioning by unique index creation +-- as it requires pg_appendonly.version >= AORelationVersion_GP7. + +create table @amname@_version_tbl (a int) using @amname@; +CREATE + +-- unique index on AO is supported starting from version 2 (AORelationVersion_GP7) +select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; + version +--------- + 2 +(1 row) +create unique index on @amname@_version_tbl(a); +CREATE +insert into @amname@_version_tbl select generate_series(1, 10); +INSERT 10 +create unique index on @amname@_version_tbl(a); +CREATE +set enable_seqscan = off; +SET +select * from @amname@_version_tbl where a = 2; + a +--- + 2 +(1 row) + +-- modify pg_appendonly.version to older such like 1 (AORelationVersion_GP6) +set allow_system_table_mods = on; +SET +update pg_appendonly set version = 1 where relid = '@amname@_version_tbl'::regclass; +UPDATE 1 + +-- unique index on AO isn't supported on version < AORelationVersion_GP7 +select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; + version +--------- + 1 +(1 row) +create unique index on @amname@_version_tbl(a); +ERROR: append-only tables with older relation versions do not support unique indexes +DETAIL: version found = 1, minimum version required = 2 +HINT: ALTER TABLE SET WITH (REORGANIZE = true) before creating the unique index + +-- alter table with reorganize to verify pg_appendonly being rewritten +alter table @amname@_version_tbl set with (reorganize = true); +ALTER +select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; + version +--------- + 2 +(1 row) +create unique index on @amname@_version_tbl(a); +CREATE +select * from @amname@_version_tbl where a = 3; + a +--- + 3 +(1 row) + +drop table @amname@_version_tbl; +DROP +reset allow_system_table_mods; +RESET diff --git a/src/test/isolation2/output/uao/vacuum_cleanup.source b/src/test/isolation2/output/uao/vacuum_cleanup.source index 430853cc420..9f4fb529184 100644 --- a/src/test/isolation2/output/uao/vacuum_cleanup.source +++ b/src/test/isolation2/output/uao/vacuum_cleanup.source @@ -114,12 +114,12 @@ VACUUM 1: SELECT * FROM gp_ao_or_aocs_seg('ao_@amname@_vacuum_cleanup3'); segment_id | segno | tupcount | modcount | formatversion | state ------------+-------+----------+----------+---------------+------- - 0 | 1 | 0 | 2 | 4 | 1 - 0 | 2 | 0 | 0 | 4 | 1 - 2 | 1 | 0 | 2 | 4 | 1 - 2 | 2 | 0 | 0 | 4 | 1 - 1 | 1 | 0 | 2 | 4 | 1 - 1 | 2 | 0 | 0 | 4 | 1 + 0 | 1 | 0 | 2 | 3 | 1 + 0 | 2 | 0 | 0 | 3 | 1 + 2 | 1 | 0 | 2 | 3 | 1 + 2 | 2 | 0 | 0 | 3 | 1 + 1 | 1 | 0 | 2 | 3 | 1 + 1 | 2 | 0 | 0 | 3 | 1 (6 rows) 2: commit; @@ -150,7 +150,7 @@ UPDATE 11 0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; segno | tupcount | modcount | formatversion | state -------+----------+----------+---------------+------- - 1 | 22 | 2 | 4 | 1 + 1 | 22 | 2 | 3 | 1 (1 row) -- start a reader before VACUUM @@ -169,8 +169,8 @@ VACUUM 0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; segno | tupcount | modcount | formatversion | state -------+----------+----------+---------------+------- - 1 | 22 | 2 | 4 | 2 - 2 | 11 | 0 | 4 | 1 + 1 | 22 | 2 | 3 | 2 + 2 | 11 | 0 | 3 | 1 (2 rows) 1: select * from vacuum_concurrent_test_@amname@ where b = 5; @@ -209,8 +209,8 @@ BEGIN 0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; segno | tupcount | modcount | formatversion | state -------+----------+----------+---------------+------- - 1 | 22 | 2 | 4 | 2 - 2 | 11 | 0 | 4 | 1 + 1 | 22 | 2 | 3 | 2 + 2 | 11 | 0 | 3 | 1 (2 rows) vacuum vacuum_concurrent_test_@amname@; @@ -220,8 +220,8 @@ VACUUM 0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; segno | tupcount | modcount | formatversion | state -------+----------+----------+---------------+------- - 1 | 0 | 2 | 4 | 1 - 2 | 11 | 0 | 4 | 1 + 1 | 0 | 2 | 3 | 1 + 2 | 11 | 0 | 3 | 1 (2 rows) 2: select * from vacuum_concurrent_test_@amname@ where b = 7; diff --git a/src/test/isolation2/output/uao/vacuum_self_serializable.source b/src/test/isolation2/output/uao/vacuum_self_serializable.source index cf109c338e5..fd70b62d4a1 100644 --- a/src/test/isolation2/output/uao/vacuum_self_serializable.source +++ b/src/test/isolation2/output/uao/vacuum_self_serializable.source @@ -19,9 +19,9 @@ SELECT COUNT(*) FROM ao; SELECT *, segno, tupcount FROM gp_ao_or_aocs_seg('ao'); segment_id | segno | tupcount | modcount | formatversion | state | segno | tupcount ------------+-------+----------+----------+---------------+-------+-------+---------- - 0 | 1 | 38 | 2 | 4 | 1 | 1 | 38 - 1 | 1 | 37 | 2 | 4 | 1 | 1 | 37 - 2 | 1 | 25 | 2 | 4 | 1 | 1 | 25 + 0 | 1 | 38 | 2 | 3 | 1 | 1 | 38 + 1 | 1 | 37 | 2 | 3 | 1 | 1 | 37 + 2 | 1 | 25 | 2 | 3 | 1 | 1 | 25 (3 rows) VACUUM ao; VACUUM @@ -29,9 +29,9 @@ VACUUM SELECT *, segno, tupcount FROM gp_ao_or_aocs_seg('ao') where state = 1 and tupcount > 0; segment_id | segno | tupcount | modcount | formatversion | state | segno | tupcount ------------+-------+----------+----------+---------------+-------+-------+---------- - 0 | 2 | 26 | 0 | 4 | 1 | 2 | 26 - 1 | 2 | 30 | 0 | 4 | 1 | 2 | 30 - 2 | 2 | 14 | 0 | 4 | 1 | 2 | 14 + 0 | 2 | 26 | 0 | 3 | 1 | 2 | 26 + 1 | 2 | 30 | 0 | 3 | 1 | 2 | 30 + 2 | 2 | 14 | 0 | 3 | 1 | 2 | 14 (3 rows) SELECT COUNT(*) FROM ao; count diff --git a/src/test/regress/output/gp_tablespace.source b/src/test/regress/output/gp_tablespace.source index fbad4a38b11..e8678813e70 100644 --- a/src/test/regress/output/gp_tablespace.source +++ b/src/test/regress/output/gp_tablespace.source @@ -166,9 +166,9 @@ select count(*) from ao_ul_ctas; select * from gp_toolkit.__gp_aoseg('ao_ul_ctas'); segment_id | segno | eof | tupcount | varblockcount | eof_uncompressed | modcount | formatversion | state ------------+-------+-------+----------+---------------+------------------+----------+---------------+------- - 2 | 0 | 84336 | 3247 | 3 | 84336 | 1 | 4 | 1 - 1 | 0 | 87816 | 3385 | 3 | 87816 | 1 | 4 | 1 - 0 | 0 | 87368 | 3368 | 3 | 87368 | 1 | 4 | 1 + 2 | 0 | 84336 | 3247 | 3 | 84336 | 1 | 3 | 1 + 1 | 0 | 87816 | 3385 | 3 | 87816 | 1 | 3 | 1 + 0 | 0 | 87368 | 3368 | 3 | 87368 | 1 | 3 | 1 (3 rows) select count(*) from aoco_ul_ctas; @@ -180,12 +180,12 @@ select count(*) from aoco_ul_ctas; select * from gp_toolkit.__gp_aocsseg('aoco_ul_ctas'); segment_id | segno | column_num | physical_segno | tupcount | eof | eof_uncompressed | modcount | formatversion | state ------------+-------+------------+----------------+----------+-------+------------------+----------+---------------+------- - 0 | 0 | 0 | 0 | 3368 | 13512 | 13512 | 1 | 4 | 1 - 0 | 0 | 1 | 128 | 3368 | 26608 | 26608 | 1 | 4 | 1 - 1 | 0 | 0 | 0 | 3385 | 13584 | 13584 | 1 | 4 | 1 - 1 | 0 | 1 | 128 | 3385 | 26760 | 26760 | 1 | 4 | 1 - 2 | 0 | 0 | 0 | 3247 | 13032 | 13032 | 1 | 4 | 1 - 2 | 0 | 1 | 128 | 3247 | 25656 | 25656 | 1 | 4 | 1 + 0 | 0 | 0 | 0 | 3368 | 13512 | 13512 | 1 | 3 | 1 + 0 | 0 | 1 | 128 | 3368 | 26608 | 26608 | 1 | 3 | 1 + 1 | 0 | 0 | 0 | 3385 | 13584 | 13584 | 1 | 3 | 1 + 1 | 0 | 1 | 128 | 3385 | 26760 | 26760 | 1 | 3 | 1 + 2 | 0 | 0 | 0 | 3247 | 13032 | 13032 | 1 | 3 | 1 + 2 | 0 | 1 | 128 | 3247 | 25656 | 25656 | 1 | 3 | 1 (6 rows) -- Check that init fork exists on master From 26fcd5a2e8606309355bbed76e683e537fc4455d Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 21 Nov 2022 09:20:40 -0800 Subject: [PATCH 13/19] ao/co: Smoke tests for subtxs and unique indexes Though no special code was written for unique indexes in the context of subtransactions, add smoke tests all the same: * For added coverage * For illustrative purposes --- src/backend/access/appendonly/README.md | 7 +- .../isolation2/expected/ao_unique_index.out | 52 ++++++++++++++ .../isolation2/expected/aocs_unique_index.out | 52 ++++++++++++++ src/test/isolation2/sql/ao_unique_index.sql | 33 +++++++++ src/test/isolation2/sql/aocs_unique_index.sql | 33 +++++++++ .../uao_dml_unique_index_delete.source | 40 +++++++++++ .../uao_dml_unique_index_update.source | 56 +++++++++++++++ .../uao_dml_unique_index_delete.source | 47 +++++++++++++ .../uao_dml_unique_index_update.source | 70 +++++++++++++++++++ 9 files changed, 388 insertions(+), 2 deletions(-) diff --git a/src/backend/access/appendonly/README.md b/src/backend/access/appendonly/README.md index 70ac7d8b902..31fb78f1a75 100644 --- a/src/backend/access/appendonly/README.md +++ b/src/backend/access/appendonly/README.md @@ -185,8 +185,11 @@ to protect the scan over pg_aoseg. To answer uniqueness checks for AO/AOCO tables, we have a complication. Unlike heap, in AO/CO we don't store the xmin/xmax fields in the tuples. So, we have to rely on block directory rows that "cover" the data rows to satisfy index lookups. -The xmin/xmax of the block directory row(s) help determine tuple visibility for -uniqueness checks. +Since the block directory is maintained as a heap table, visibility checks on it +are identical to any other heap table: the xmin/xmax of the block directory +row(s) will be leveraged. This means we don't have to write any special +visibility checking code ourselves, nor do we need to worry about transactions +vs subtransactions. Since block directory rows are written usually much after the data row has been inserted, there are windows in which there is no block directory row on disk diff --git a/src/test/isolation2/expected/ao_unique_index.out b/src/test/isolation2/expected/ao_unique_index.out index 090e34c8575..2abe0c61d42 100644 --- a/src/test/isolation2/expected/ao_unique_index.out +++ b/src/test/isolation2/expected/ao_unique_index.out @@ -405,8 +405,60 @@ CONTEXT: COPY unique_index_ao_row, line 1 -- now that tx 1 was aborted, tx 2 is successful. 2<: <... completed> COPY 3 +1: END; +END DROP TABLE unique_index_ao_row; DROP + +-------------------------------------------------------------------------------- +-------------------- Smoke tests for subtransactions --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE + +1: BEGIN; +BEGIN +1: SAVEPOINT a; +SAVEPOINT +1: INSERT INTO unique_index_ao_row VALUES(1); +INSERT 1 + +-- concurrent tx inserting conflicting row should block. +2: BEGIN; +BEGIN +2&: INSERT INTO unique_index_ao_row VALUES(1); +-- concurrent tx inserting non-conflicting row should be successful. +3: INSERT INTO unique_index_ao_row VALUES(2); +INSERT 1 + +-- conflict should be detected within the same subtx. +1: INSERT INTO unique_index_ao_row VALUES(1); +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg2 192.168.0.148:7004 pid=3396954) +DETAIL: Key (a)=(1) already exists. +-- the concurrent tx should now succeed. +2<: <... completed> +INSERT 1 +2: ABORT; +ABORT + +-- after rolling back to the savepoint, we should be able to re-insert the key +1: ROLLBACK TO SAVEPOINT a; +ROLLBACK +1: INSERT INTO unique_index_ao_row VALUES(1); +INSERT 1 +1: COMMIT; +COMMIT + +SELECT * FROM unique_index_ao_row; + a +--- + 1 + 2 +(2 rows) + +DROP TABLE unique_index_ao_row; +DROP + RESET gp_appendonly_enable_unique_index; RESET diff --git a/src/test/isolation2/expected/aocs_unique_index.out b/src/test/isolation2/expected/aocs_unique_index.out index dfb72c87966..083a0340ff4 100644 --- a/src/test/isolation2/expected/aocs_unique_index.out +++ b/src/test/isolation2/expected/aocs_unique_index.out @@ -405,8 +405,60 @@ CONTEXT: COPY unique_index_ao_column, line 1 -- now that tx 1 was aborted, tx 2 is successful. 2<: <... completed> COPY 3 +1: END; +END DROP TABLE unique_index_ao_column; DROP + +-------------------------------------------------------------------------------- +-------------------- Smoke tests for subtransactions --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_column (a INT unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE + +1: BEGIN; +BEGIN +1: SAVEPOINT a; +SAVEPOINT +1: INSERT INTO unique_index_ao_column VALUES(1); +INSERT 1 + +-- concurrent tx inserting conflicting row should block. +2: BEGIN; +BEGIN +2&: INSERT INTO unique_index_ao_column VALUES(1); +-- concurrent tx inserting non-conflicting row should be successful. +3: INSERT INTO unique_index_ao_column VALUES(2); +INSERT 1 + +-- conflict should be detected within the same subtx. +1: INSERT INTO unique_index_ao_column VALUES(1); +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg1 192.168.0.148:7003 pid=3397768) +DETAIL: Key (a)=(1) already exists. +-- the concurrent tx should now succeed. +2<: <... completed> +INSERT 1 +2: ABORT; +ABORT + +-- after rolling back to the savepoint, we should be able to re-insert the key +1: ROLLBACK TO SAVEPOINT a; +ROLLBACK +1: INSERT INTO unique_index_ao_column VALUES(1); +INSERT 1 +1: COMMIT; +COMMIT + +SELECT * FROM unique_index_ao_column; + a +--- + 1 + 2 +(2 rows) + +DROP TABLE unique_index_ao_column; +DROP + RESET gp_appendonly_enable_unique_index; RESET diff --git a/src/test/isolation2/sql/ao_unique_index.sql b/src/test/isolation2/sql/ao_unique_index.sql index a06f1bbf5c9..f5b95b0d800 100644 --- a/src/test/isolation2/sql/ao_unique_index.sql +++ b/src/test/isolation2/sql/ao_unique_index.sql @@ -270,6 +270,39 @@ CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row 1: COPY unique_index_ao_row FROM PROGRAM 'seq 1 1'; -- now that tx 1 was aborted, tx 2 is successful. 2<: +1: END; DROP TABLE unique_index_ao_row; + +-------------------------------------------------------------------------------- +-------------------- Smoke tests for subtransactions --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; + +1: BEGIN; +1: SAVEPOINT a; +1: INSERT INTO unique_index_ao_row VALUES(1); + +-- concurrent tx inserting conflicting row should block. +2: BEGIN; +2&: INSERT INTO unique_index_ao_row VALUES(1); +-- concurrent tx inserting non-conflicting row should be successful. +3: INSERT INTO unique_index_ao_row VALUES(2); + +-- conflict should be detected within the same subtx. +1: INSERT INTO unique_index_ao_row VALUES(1); +-- the concurrent tx should now succeed. +2<: +2: ABORT; + +-- after rolling back to the savepoint, we should be able to re-insert the key +1: ROLLBACK TO SAVEPOINT a; +1: INSERT INTO unique_index_ao_row VALUES(1); +1: COMMIT; + +SELECT * FROM unique_index_ao_row; + +DROP TABLE unique_index_ao_row; + RESET gp_appendonly_enable_unique_index; diff --git a/src/test/isolation2/sql/aocs_unique_index.sql b/src/test/isolation2/sql/aocs_unique_index.sql index 80e9d9389c1..b35d34ff39f 100644 --- a/src/test/isolation2/sql/aocs_unique_index.sql +++ b/src/test/isolation2/sql/aocs_unique_index.sql @@ -270,6 +270,39 @@ CREATE TABLE unique_index_ao_column (a INT unique) USING ao_column 1: COPY unique_index_ao_column FROM PROGRAM 'seq 1 1'; -- now that tx 1 was aborted, tx 2 is successful. 2<: +1: END; DROP TABLE unique_index_ao_column; + +-------------------------------------------------------------------------------- +-------------------- Smoke tests for subtransactions --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_column (a INT unique) USING ao_column + DISTRIBUTED REPLICATED; + +1: BEGIN; +1: SAVEPOINT a; +1: INSERT INTO unique_index_ao_column VALUES(1); + +-- concurrent tx inserting conflicting row should block. +2: BEGIN; +2&: INSERT INTO unique_index_ao_column VALUES(1); +-- concurrent tx inserting non-conflicting row should be successful. +3: INSERT INTO unique_index_ao_column VALUES(2); + +-- conflict should be detected within the same subtx. +1: INSERT INTO unique_index_ao_column VALUES(1); +-- the concurrent tx should now succeed. +2<: +2: ABORT; + +-- after rolling back to the savepoint, we should be able to re-insert the key +1: ROLLBACK TO SAVEPOINT a; +1: INSERT INTO unique_index_ao_column VALUES(1); +1: COMMIT; + +SELECT * FROM unique_index_ao_column; + +DROP TABLE unique_index_ao_column; + RESET gp_appendonly_enable_unique_index; diff --git a/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source b/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source index ab4c2c9501e..4fcdf691b3f 100644 --- a/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source +++ b/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source @@ -71,3 +71,43 @@ DELETE FROM uao_unique_index_delete WHERE a = 2; SELECT * FROM uao_unique_index_delete; DROP TABLE uao_unique_index_delete; + +-- Case 7: Deleting tx deletes a key inserted in the same subtx----------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_delete VALUES (1); +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; + +-- Case 8: Deleting tx deletes a key deleted in the same subtx------------------ +CREATE TABLE uao_unique_index_delete (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_delete VALUES (1); +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should be a no-op +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; + +-- Case 9: Deleting tx deletes a key whose earlier delete was rolled back------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +SAVEPOINT a; +DELETE FROM uao_unique_index_delete WHERE a = 1; +ROLLBACK TO SAVEPOINT a; +-- should be able to delete it again. +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should be able to insert it now since it has been deleted +INSERT INTO uao_unique_index_delete VALUES (1); +COMMIT; +SELECT * FROM uao_unique_index_delete; + +DROP TABLE uao_unique_index_delete; diff --git a/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source b/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source index c1e4eb90cc9..07f40c179a8 100644 --- a/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source +++ b/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source @@ -116,3 +116,59 @@ INSERT INTO uao_unique_index_update SELECT generate_series(1,5); UPDATE uao_unique_index_update SET a=6 WHERE a>2; DROP TABLE uao_unique_index_update; + +-- Case 11: Updating tx updates a key inserted in the same subtx---------------- +CREATE TABLE uao_unique_index_update (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_update VALUES(1); +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +COMMIT; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 12: Updating tx updates a key updated in the same subtx----------------- +CREATE TABLE uao_unique_index_update (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_update VALUES(1); +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +-- should be a no-op +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +-- should succeed +UPDATE uao_unique_index_update SET a=3 WHERE a=2; +COMMIT; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 13: Updating tx updates a key whose earlier update was rolled back------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES(1); +BEGIN; +SAVEPOINT a; +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +ROLLBACK TO SAVEPOINT a; +-- should be able to run the update again as we have rolled back. +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +COMMIT; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; + +-- Case 14: Updating tx updates a key to a key inserted in the same subtx------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_update VALUES (2); +-- should raise a conflict with the key inserted inside the same subtx. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +ROLLBACK TO SAVEPOINT a; +-- should be able to run the update again as we have rolled back. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_update; + +DROP TABLE uao_unique_index_update; diff --git a/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source b/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source index 2e8cc8af1ec..39639389e93 100644 --- a/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source +++ b/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source @@ -88,3 +88,50 @@ SELECT * FROM uao_unique_index_delete; (1 row) DROP TABLE uao_unique_index_delete; +-- Case 7: Deleting tx deletes a key inserted in the same subtx----------------- +CREATE TABLE uao_unique_index_delete (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_delete VALUES (1); +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + a +--- +(0 rows) + +DROP TABLE uao_unique_index_delete; +-- Case 8: Deleting tx deletes a key deleted in the same subtx------------------ +CREATE TABLE uao_unique_index_delete (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_delete VALUES (1); +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should be a no-op +DELETE FROM uao_unique_index_delete WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_delete; + a +--- +(0 rows) + +DROP TABLE uao_unique_index_delete; +-- Case 9: Deleting tx deletes a key whose earlier delete was rolled back------- +CREATE TABLE uao_unique_index_delete (a INT unique); +INSERT INTO uao_unique_index_delete VALUES (1); +BEGIN; +SAVEPOINT a; +DELETE FROM uao_unique_index_delete WHERE a = 1; +ROLLBACK TO SAVEPOINT a; +-- should be able to delete it again. +DELETE FROM uao_unique_index_delete WHERE a = 1; +-- should be able to insert it now since it has been deleted +INSERT INTO uao_unique_index_delete VALUES (1); +COMMIT; +SELECT * FROM uao_unique_index_delete; + a +--- + 1 +(1 row) + +DROP TABLE uao_unique_index_delete; diff --git a/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source b/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source index 872e6b79d8f..69c11abe876 100644 --- a/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source +++ b/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source @@ -159,3 +159,73 @@ UPDATE uao_unique_index_update SET a=6 WHERE a>2; ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg2 192.168.0.148:7004 pid=1669359) DETAIL: Key (a)=(6) already exists. DROP TABLE uao_unique_index_update; +-- Case 11: Updating tx updates a key inserted in the same subtx---------------- +CREATE TABLE uao_unique_index_update (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_update VALUES(1); +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +COMMIT; +SELECT * FROM uao_unique_index_update; + a +--- + 2 +(1 row) + +DROP TABLE uao_unique_index_update; +-- Case 12: Updating tx updates a key updated in the same subtx----------------- +CREATE TABLE uao_unique_index_update (a INT unique); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_update VALUES(1); +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +-- should be a no-op +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +-- should succeed +UPDATE uao_unique_index_update SET a=3 WHERE a=2; +COMMIT; +SELECT * FROM uao_unique_index_update; + a +--- + 3 +(1 row) + +DROP TABLE uao_unique_index_update; +-- Case 13: Updating tx updates a key whose earlier update was rolled back------ +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES(1); +BEGIN; +SAVEPOINT a; +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +ROLLBACK TO SAVEPOINT a; +-- should be able to run the update again as we have rolled back. +UPDATE uao_unique_index_update SET a=2 WHERE a=1; +COMMIT; +SELECT * FROM uao_unique_index_update; + a +--- + 2 +(1 row) + +DROP TABLE uao_unique_index_update; +-- Case 14: Updating tx updates a key to a key inserted in the same subtx------- +CREATE TABLE uao_unique_index_update (a INT unique); +INSERT INTO uao_unique_index_update VALUES (1); +BEGIN; +SAVEPOINT a; +INSERT INTO uao_unique_index_update VALUES (2); +-- should raise a conflict with the key inserted inside the same subtx. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +ERROR: duplicate key value violates unique constraint "uao_unique_index_update_a_key" (seg0 192.168.0.148:7002 pid=3411438) +DETAIL: Key (a)=(2) already exists. +ROLLBACK TO SAVEPOINT a; +-- should be able to run the update again as we have rolled back. +UPDATE uao_unique_index_update SET a = 2 WHERE a = 1; +COMMIT; +SELECT * FROM uao_unique_index_update; + a +--- + 2 +(1 row) + +DROP TABLE uao_unique_index_update; From 927c503b0d8430e94a8cb1bc958260277c4fb479 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 21 Nov 2022 09:24:04 -0800 Subject: [PATCH 14/19] ao/co: Retire dev guc hiding unique index feature Now that we have support for unique indexes on AO/CO tables, drop the dev GUC hiding the feature. Update tests accordingly. --- src/test/isolation2/expected/ao_blkdir.out | 8 - .../isolation2/expected/ao_unique_index.out | 68 +++++++- .../isolation2/expected/aocs_unique_index.out | 68 +++++++- .../input/uao/ao_unique_index_vacuum.source | 73 +++++++++ .../output/uao/ao_unique_index_vacuum.source | 124 ++++++++++++++ src/test/isolation2/sql/ao_blkdir.sql | 4 - src/test/isolation2/sql/ao_unique_index.sql | 42 ++++- src/test/isolation2/sql/aocs_unique_index.sql | 42 ++++- .../regress/expected/alter_table_aocs.out | 5 - src/test/regress/expected/qp_with_clause.out | 18 --- .../input/uao_ddl/alter_ao_table_index.source | 3 - .../uao_dml/ao_unique_index_build.source | 86 ++++++++++ .../uao_dml_unique_index_delete.source | 2 - .../uao_dml_unique_index_update.source | 2 - .../uao_ddl/alter_ao_table_index.source | 3 - .../uao_dml/ao_unique_index_build.source | 152 ++++++++++++++++++ .../uao_dml_unique_index_delete.source | 1 - .../uao_dml_unique_index_update.source | 1 - src/test/regress/sql/alter_table_aocs.sql | 3 - src/test/regress/sql/qp_with_clause.sql | 20 --- 20 files changed, 639 insertions(+), 86 deletions(-) create mode 100644 src/test/isolation2/input/uao/ao_unique_index_vacuum.source create mode 100644 src/test/isolation2/output/uao/ao_unique_index_vacuum.source create mode 100644 src/test/regress/input/uao_dml/ao_unique_index_build.source create mode 100644 src/test/regress/output/uao_dml/ao_unique_index_build.source diff --git a/src/test/isolation2/expected/ao_blkdir.out b/src/test/isolation2/expected/ao_blkdir.out index f44e4632343..c798a23c8f6 100644 --- a/src/test/isolation2/expected/ao_blkdir.out +++ b/src/test/isolation2/expected/ao_blkdir.out @@ -233,8 +233,6 @@ SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id -- Unique index white box tests DROP TABLE ao_blkdir_test; DROP -SET gp_appendonly_enable_unique_index TO ON; -SET CREATE TABLE ao_blkdir_test(i int UNIQUE, j int) USING ao_row DISTRIBUTED BY (i); CREATE @@ -316,8 +314,6 @@ COMMIT DROP TABLE ao_blkdir_test; DROP -RESET gp_appendonly_enable_unique_index; -RESET -------------------------------------------------------------------------------- -- AOCO tables @@ -721,8 +717,6 @@ SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_ -- Unique index white box tests DROP TABLE aoco_blkdir_test; DROP -SET gp_appendonly_enable_unique_index TO ON; -SET CREATE TABLE aoco_blkdir_test(h int, i int UNIQUE, j int) USING ao_column DISTRIBUTED BY (i); CREATE @@ -823,5 +817,3 @@ DETAIL: Key (i)=(2) already exists. DROP TABLE aoco_blkdir_test; DROP -RESET gp_appendonly_enable_unique_index; -RESET diff --git a/src/test/isolation2/expected/ao_unique_index.out b/src/test/isolation2/expected/ao_unique_index.out index 2abe0c61d42..b6a49a20a37 100644 --- a/src/test/isolation2/expected/ao_unique_index.out +++ b/src/test/isolation2/expected/ao_unique_index.out @@ -6,9 +6,6 @@ -- us to predict block directory entries without having to worry about the -- table's distribution. -SET gp_appendonly_enable_unique_index TO ON; -SET - -- Case 1: Conflict with committed transaction---------------------------------- CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; CREATE @@ -460,5 +457,66 @@ SELECT * FROM unique_index_ao_row; DROP TABLE unique_index_ao_row; DROP -RESET gp_appendonly_enable_unique_index; -RESET +-------------------------------------------------------------------------------- +-------------------- Smoke tests for repeatable read --------------------------- +-------------------------------------------------------------------------------- + +-- Test that shows that unique index checks transcend transaction isolation +-- boundaries. + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; +CREATE + +-- Begin two txs with tx level snapshot taken early. +1: BEGIN ISOLATION LEVEL REPEATABLE READ; +BEGIN +1: SELECT * FROM unique_index_ao_row; + a +--- +(0 rows) +2: BEGIN ISOLATION LEVEL REPEATABLE READ; +BEGIN +2: SELECT * FROM unique_index_ao_row; + a +--- +(0 rows) + +-- Now begin a concurrent transaction which inserts a key. +3: BEGIN; +BEGIN +3: INSERT INTO unique_index_ao_row VALUES(1); +INSERT 1 + +-- And another transaction inserts a key and commits. +INSERT INTO unique_index_ao_row VALUES(2); +INSERT 1 + +-- Tx should block on insert of conflicting key, even though it can't "see" the +-- conflicting key due to its isolation level. +1: SELECT * FROM unique_index_ao_row; + a +--- +(0 rows) +1&: INSERT INTO unique_index_ao_row VALUES(1); + +3: ABORT; +ABORT +1<: <... completed> +INSERT 1 +1: ABORT; +ABORT + +-- Tx should raise a conflict, even though it can't "see" the conflicting key +-- due to its isolation level. +2: SELECT * FROM unique_index_ao_row; + a +--- +(0 rows) +2: INSERT INTO unique_index_ao_row VALUES(2); +ERROR: duplicate key value violates unique constraint "unique_index_ao_row_a_key" (seg1 192.168.0.148:7003 pid=3417060) +DETAIL: Key (a)=(2) already exists. +2: ABORT; +ABORT + +DROP TABLE unique_index_ao_row; +DROP diff --git a/src/test/isolation2/expected/aocs_unique_index.out b/src/test/isolation2/expected/aocs_unique_index.out index 083a0340ff4..8aa5e0e8522 100644 --- a/src/test/isolation2/expected/aocs_unique_index.out +++ b/src/test/isolation2/expected/aocs_unique_index.out @@ -6,9 +6,6 @@ -- us to predict block directory entries without having to worry about the -- table's distribution. -SET gp_appendonly_enable_unique_index TO ON; -SET - -- Case 1: Conflict with committed transaction---------------------------------- CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; CREATE @@ -460,5 +457,66 @@ SELECT * FROM unique_index_ao_column; DROP TABLE unique_index_ao_column; DROP -RESET gp_appendonly_enable_unique_index; -RESET +-------------------------------------------------------------------------------- +-------------------- Smoke tests for repeatable read --------------------------- +-------------------------------------------------------------------------------- + +-- Test that shows that unique index checks transcend transaction isolation +-- boundaries. + +CREATE TABLE unique_index_ao_column (a INT unique) USING ao_column DISTRIBUTED REPLICATED; +CREATE + +-- Begin two txs with tx level snapshot taken early. +1: BEGIN ISOLATION LEVEL REPEATABLE READ; +BEGIN +1: SELECT * FROM unique_index_ao_column; + a +--- +(0 rows) +2: BEGIN ISOLATION LEVEL REPEATABLE READ; +BEGIN +2: SELECT * FROM unique_index_ao_column; + a +--- +(0 rows) + +-- Now begin a concurrent transaction which inserts a key. +3: BEGIN; +BEGIN +3: INSERT INTO unique_index_ao_column VALUES(1); +INSERT 1 + +-- And another transaction inserts a key and commits. +INSERT INTO unique_index_ao_column VALUES(2); +INSERT 1 + +-- Tx should block on insert of conflicting key, even though it can't "see" the +-- conflicting key due to its isolation level. +1: SELECT * FROM unique_index_ao_column; + a +--- +(0 rows) +1&: INSERT INTO unique_index_ao_column VALUES(1); + +3: ABORT; +ABORT +1<: <... completed> +INSERT 1 +1: ABORT; +ABORT + +-- Tx should raise a conflict, even though it can't "see" the conflicting key +-- due to its isolation level. +2: SELECT * FROM unique_index_ao_column; + a +--- +(0 rows) +2: INSERT INTO unique_index_ao_column VALUES(2); +ERROR: duplicate key value violates unique constraint "unique_index_ao_column_a_key" (seg0 192.168.0.148:7002 pid=3417500) +DETAIL: Key (a)=(2) already exists. +2: ABORT; +ABORT + +DROP TABLE unique_index_ao_column; +DROP diff --git a/src/test/isolation2/input/uao/ao_unique_index_vacuum.source b/src/test/isolation2/input/uao/ao_unique_index_vacuum.source new file mode 100644 index 00000000000..00daf441a68 --- /dev/null +++ b/src/test/isolation2/input/uao/ao_unique_index_vacuum.source @@ -0,0 +1,73 @@ +-- Test cases to cover VACUUM and concurrent INSERT behavior on append-optimized +-- tables with unique indexes. + +-- Case 1: Basic case with a few deleted tuples--------------------------------- +CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_vacuum_@amname@ SELECT generate_series(1, 5); +DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; +-- should succeed (and not raise conflicts for rows [1,4] while moving rows [1,4]) +VACUUM unique_index_vacuum_@amname@; +-- There should be 1 visible blkdir row with all 4 visible tuples +SELECT (gp_toolkit.__gp_aoblkdir('unique_index_vacuum_@amname@')).* + FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; +DROP TABLE unique_index_vacuum_@amname@; + +-- Case 2: Concurrent case showcasing that a placeholder block directory row is +-- not necessary to be inserted for the rows transferred to a new segment by +-- a VACUUM operation. +CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_vacuum_@amname@ SELECT generate_series(1, 5); +DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; + +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_vacuum_@amname@', 2, 2, 0, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +1&: VACUUM unique_index_vacuum_@amname@; + +-- Wait until tuple with key i = 1 has been moved by the vacuum operation +SELECT gp_wait_until_triggered_fault('appendonly_insert', 2, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +-- Even though a new index entry has been written for the moved tuple with key +-- i = 1, the old index entry (pointing to the old segfile) will still be live +-- and will always be used in detecting the conflict (chosen over the new index +-- entry and its associated block directory entry). +INSERT INTO unique_index_vacuum_@amname@ VALUES(1); + +-- Inserting a key not moved yet should also result in a conflict. +INSERT INTO unique_index_vacuum_@amname@ VALUES(2); + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +1<: +DROP TABLE unique_index_vacuum_@amname@; + +-- Case 3: Concurrent case with a conflicting insert where the vacuum is hung +-- just after it has bulk deleted the old index entries. +CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_vacuum_@amname@ SELECT generate_series(1, 5); +DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; + +SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'suspend', dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +1&: VACUUM unique_index_vacuum_@amname@; + +-- Wait until all old index entries have been deleted by the VACUUM. +SELECT gp_wait_until_triggered_fault('vacuum_ao_after_index_delete', 1, dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +-- Now trying to insert key = 1 will also be detected as a conflict, even +-- though the old index entries are no longer present. We have the new index +-- entries (and the new block directory rows) to thank, which have already been +-- persisted at end of insert, within the VACUUM. +2: INSERT INTO unique_index_vacuum_@amname@ VALUES (1); + +SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'reset', dbid) + FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + +1<: +DROP TABLE unique_index_vacuum_@amname@; diff --git a/src/test/isolation2/output/uao/ao_unique_index_vacuum.source b/src/test/isolation2/output/uao/ao_unique_index_vacuum.source new file mode 100644 index 00000000000..9e7f70b1ced --- /dev/null +++ b/src/test/isolation2/output/uao/ao_unique_index_vacuum.source @@ -0,0 +1,124 @@ +-- Test cases to cover VACUUM and concurrent INSERT behavior on append-optimized +-- tables with unique indexes. + +-- Case 1: Basic case with a few deleted tuples--------------------------------- +CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_vacuum_@amname@ SELECT generate_series(1, 5); +INSERT 5 +DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; +DELETE 1 +-- should succeed (and not raise conflicts for rows [1,4] while moving rows [1,4]) +VACUUM unique_index_vacuum_@amname@; +VACUUM +-- There should be 1 visible blkdir row with all 4 visible tuples +SELECT (gp_toolkit.__gp_aoblkdir('unique_index_vacuum_@amname@')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; + tupleid | segno | columngroup_no | entry_no | first_row_no | file_offset | row_count +---------+-------+----------------+----------+--------------+-------------+----------- + (0,3) | 2 | 0 | 0 | 1 | 0 | 4 +(1 row) +DROP TABLE unique_index_vacuum_@amname@; +DROP + +-- Case 2: Concurrent case showcasing that a placeholder block directory row is +-- not necessary to be inserted for the rows transferred to a new segment by +-- a VACUUM operation. +CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_vacuum_@amname@ SELECT generate_series(1, 5); +INSERT 5 +DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; +DELETE 1 + +SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'unique_index_vacuum_@amname@', 2, 2, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +1&: VACUUM unique_index_vacuum_@amname@; + +-- Wait until tuple with key i = 1 has been moved by the vacuum operation +SELECT gp_wait_until_triggered_fault('appendonly_insert', 2, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) +-- Even though a new index entry has been written for the moved tuple with key +-- i = 1, the old index entry (pointing to the old segfile) will still be live +-- and will always be used in detecting the conflict (chosen over the new index +-- entry and its associated block directory entry). +INSERT INTO unique_index_vacuum_@amname@ VALUES(1); +ERROR: duplicate key value violates unique constraint "unique_index_vacuum_@amname@_i_key" (seg1 192.168.0.148:7003 pid=3197772) +DETAIL: Key (i)=(1) already exists. + +-- Inserting a key not moved yet should also result in a conflict. +INSERT INTO unique_index_vacuum_@amname@ VALUES(2); +ERROR: duplicate key value violates unique constraint "unique_index_vacuum_@amname@_i_key" (seg1 192.168.0.148:7003 pid=3197772) +DETAIL: Key (i)=(2) already exists. + +SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +1<: <... completed> +VACUUM +DROP TABLE unique_index_vacuum_@amname@; +DROP + +-- Case 3: Concurrent case with a conflicting insert where the vacuum is hung +-- just after it has bulk deleted the old index entries. +CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_vacuum_@amname@ SELECT generate_series(1, 5); +INSERT 5 +DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; +DELETE 1 + +SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'suspend', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +1&: VACUUM unique_index_vacuum_@amname@; + +-- Wait until all old index entries have been deleted by the VACUUM. +SELECT gp_wait_until_triggered_fault('vacuum_ao_after_index_delete', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_wait_until_triggered_fault +------------------------------- + Success: + Success: + Success: +(3 rows) + +-- Now trying to insert key = 1 will also be detected as a conflict, even +-- though the old index entries are no longer present. We have the new index +-- entries (and the new block directory rows) to thank, which have already been +-- persisted at end of insert, within the VACUUM. +2: INSERT INTO unique_index_vacuum_@amname@ VALUES (1); +ERROR: duplicate key value violates unique constraint "unique_index_vacuum_@amname@_i_key" (seg1 192.168.0.148:7003 pid=3197808) +DETAIL: Key (i)=(1) already exists. + +SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault +----------------- + Success: + Success: + Success: +(3 rows) + +1<: <... completed> +VACUUM +DROP TABLE unique_index_vacuum_@amname@; +DROP diff --git a/src/test/isolation2/sql/ao_blkdir.sql b/src/test/isolation2/sql/ao_blkdir.sql index 217244d6466..f7e3074dba3 100644 --- a/src/test/isolation2/sql/ao_blkdir.sql +++ b/src/test/isolation2/sql/ao_blkdir.sql @@ -43,7 +43,6 @@ WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; -- Unique index white box tests DROP TABLE ao_blkdir_test; -SET gp_appendonly_enable_unique_index TO ON; CREATE TABLE ao_blkdir_test(i int UNIQUE, j int) USING ao_row DISTRIBUTED BY (i); SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'ao_blkdir_test', 1, 1, 0, dbid) @@ -94,7 +93,6 @@ WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; DROP TABLE ao_blkdir_test; -RESET gp_appendonly_enable_unique_index; -------------------------------------------------------------------------------- -- AOCO tables @@ -139,7 +137,6 @@ WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; -- Unique index white box tests DROP TABLE aoco_blkdir_test; -SET gp_appendonly_enable_unique_index TO ON; CREATE TABLE aoco_blkdir_test(h int, i int UNIQUE, j int) USING ao_column DISTRIBUTED BY (i); SELECT gp_inject_fault('appendonly_insert', 'suspend', '', '', 'aoco_blkdir_test', 1, 1, 0, dbid) @@ -202,4 +199,3 @@ FROM gp_segment_configuration WHERE role = 'p' AND content = 0; 4: INSERT INTO aoco_blkdir_test VALUES (2, 2); DROP TABLE aoco_blkdir_test; -RESET gp_appendonly_enable_unique_index; diff --git a/src/test/isolation2/sql/ao_unique_index.sql b/src/test/isolation2/sql/ao_unique_index.sql index f5b95b0d800..8035916f95e 100644 --- a/src/test/isolation2/sql/ao_unique_index.sql +++ b/src/test/isolation2/sql/ao_unique_index.sql @@ -6,8 +6,6 @@ -- us to predict block directory entries without having to worry about the -- table's distribution. -SET gp_appendonly_enable_unique_index TO ON; - -- Case 1: Conflict with committed transaction---------------------------------- CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row DISTRIBUTED REPLICATED; @@ -305,4 +303,42 @@ SELECT * FROM unique_index_ao_row; DROP TABLE unique_index_ao_row; -RESET gp_appendonly_enable_unique_index; +-------------------------------------------------------------------------------- +-------------------- Smoke tests for repeatable read --------------------------- +-------------------------------------------------------------------------------- + +-- Test that shows that unique index checks transcend transaction isolation +-- boundaries. + +CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row + DISTRIBUTED REPLICATED; + +-- Begin two txs with tx level snapshot taken early. +1: BEGIN ISOLATION LEVEL REPEATABLE READ; +1: SELECT * FROM unique_index_ao_row; +2: BEGIN ISOLATION LEVEL REPEATABLE READ; +2: SELECT * FROM unique_index_ao_row; + +-- Now begin a concurrent transaction which inserts a key. +3: BEGIN; +3: INSERT INTO unique_index_ao_row VALUES(1); + +-- And another transaction inserts a key and commits. +INSERT INTO unique_index_ao_row VALUES(2); + +-- Tx should block on insert of conflicting key, even though it can't "see" the +-- conflicting key due to its isolation level. +1: SELECT * FROM unique_index_ao_row; +1&: INSERT INTO unique_index_ao_row VALUES(1); + +3: ABORT; +1<: +1: ABORT; + +-- Tx should raise a conflict, even though it can't "see" the conflicting key +-- due to its isolation level. +2: SELECT * FROM unique_index_ao_row; +2: INSERT INTO unique_index_ao_row VALUES(2); +2: ABORT; + +DROP TABLE unique_index_ao_row; diff --git a/src/test/isolation2/sql/aocs_unique_index.sql b/src/test/isolation2/sql/aocs_unique_index.sql index b35d34ff39f..4cd59262ce3 100644 --- a/src/test/isolation2/sql/aocs_unique_index.sql +++ b/src/test/isolation2/sql/aocs_unique_index.sql @@ -6,8 +6,6 @@ -- us to predict block directory entries without having to worry about the -- table's distribution. -SET gp_appendonly_enable_unique_index TO ON; - -- Case 1: Conflict with committed transaction---------------------------------- CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column DISTRIBUTED REPLICATED; @@ -305,4 +303,42 @@ SELECT * FROM unique_index_ao_column; DROP TABLE unique_index_ao_column; -RESET gp_appendonly_enable_unique_index; +-------------------------------------------------------------------------------- +-------------------- Smoke tests for repeatable read --------------------------- +-------------------------------------------------------------------------------- + +-- Test that shows that unique index checks transcend transaction isolation +-- boundaries. + +CREATE TABLE unique_index_ao_column (a INT unique) USING ao_column + DISTRIBUTED REPLICATED; + +-- Begin two txs with tx level snapshot taken early. +1: BEGIN ISOLATION LEVEL REPEATABLE READ; +1: SELECT * FROM unique_index_ao_column; +2: BEGIN ISOLATION LEVEL REPEATABLE READ; +2: SELECT * FROM unique_index_ao_column; + +-- Now begin a concurrent transaction which inserts a key. +3: BEGIN; +3: INSERT INTO unique_index_ao_column VALUES(1); + +-- And another transaction inserts a key and commits. +INSERT INTO unique_index_ao_column VALUES(2); + +-- Tx should block on insert of conflicting key, even though it can't "see" the +-- conflicting key due to its isolation level. +1: SELECT * FROM unique_index_ao_column; +1&: INSERT INTO unique_index_ao_column VALUES(1); + +3: ABORT; +1<: +1: ABORT; + +-- Tx should raise a conflict, even though it can't "see" the conflicting key +-- due to its isolation level. +2: SELECT * FROM unique_index_ao_column; +2: INSERT INTO unique_index_ao_column VALUES(2); +2: ABORT; + +DROP TABLE unique_index_ao_column; diff --git a/src/test/regress/expected/alter_table_aocs.out b/src/test/regress/expected/alter_table_aocs.out index cfcccc2da95..091d0f5e250 100644 --- a/src/test/regress/expected/alter_table_aocs.out +++ b/src/test/regress/expected/alter_table_aocs.out @@ -527,11 +527,6 @@ select attstattarget from pg_attribute where attrelid = 'aocs_addcol.addcol1'::r -- test alter distribution policy alter table addcol1 set distributed randomly; alter table addcol1 set distributed by (a); --- test some constraints (unique indexes do not work for unique and pkey) -alter table addcol1 add constraint tunique unique(a); -ERROR: append-only tables do not support unique indexes -alter table addcol1 add constraint tpkey primary key(a); -ERROR: append-only tables do not support unique indexes alter table addcol1 add constraint tcheck check (a is not null); -- test changing the storage type of a column alter table addcol1 alter column f_renamed type varchar(7); diff --git a/src/test/regress/expected/qp_with_clause.out b/src/test/regress/expected/qp_with_clause.out index 773b213d86a..10c02bae0c4 100644 --- a/src/test/regress/expected/qp_with_clause.out +++ b/src/test/regress/expected/qp_with_clause.out @@ -6392,15 +6392,6 @@ CREATE TABLE countrylanguage_ao ( isofficial boolean NOT NULL, percentage real NOT NULL ) with (appendonly=true) distributed by (countrycode,language); -ALTER TABLE ONLY city_ao - ADD CONSTRAINT city_ao_pkey PRIMARY KEY (id); -ERROR: append-only tables do not support unique indexes -ALTER TABLE ONLY country_ao - ADD CONSTRAINT country_ao_pkey PRIMARY KEY (code); -ERROR: append-only tables do not support unique indexes -ALTER TABLE ONLY countrylanguage_ao - ADD CONSTRAINT countrylanguage_ao_pkey PRIMARY KEY (countrycode, "language"); -ERROR: append-only tables do not support unique indexes create index bitmap_city_ao_countrycode on city_ao using bitmap(countrycode); create index bitmap_country_ao_gf on country_ao using bitmap(governmentform); create index bitmap_country_ao_region on country_ao using bitmap(region); @@ -7294,15 +7285,6 @@ CREATE TABLE countrylanguage_co ( isofficial boolean NOT NULL, percentage real NOT NULL ) with (appendonly=true,orientation=column) distributed by (countrycode,language); -ALTER TABLE ONLY city_co - ADD CONSTRAINT city_co_pkey PRIMARY KEY (id); -ERROR: append-only tables do not support unique indexes -ALTER TABLE ONLY country_co - ADD CONSTRAINT country_co_pkey PRIMARY KEY (code); -ERROR: append-only tables do not support unique indexes -ALTER TABLE ONLY countrylanguage_co - ADD CONSTRAINT countrylanguage_co_pkey PRIMARY KEY (countrycode, "language"); -ERROR: append-only tables do not support unique indexes create index bitmap_city_co_countrycode on city_co using bitmap(countrycode); create index bitmap_country_co_gf on country_co using bitmap(governmentform); create index bitmap_country_co_region on country_co using bitmap(region); diff --git a/src/test/regress/input/uao_ddl/alter_ao_table_index.source b/src/test/regress/input/uao_ddl/alter_ao_table_index.source index da52b70f89f..2d41b579e3c 100644 --- a/src/test/regress/input/uao_ddl/alter_ao_table_index.source +++ b/src/test/regress/input/uao_ddl/alter_ao_table_index.source @@ -51,6 +51,3 @@ select relfrozenxid from pg_class c, pg_namespace n where select relfrozenxid from gp_dist_random('pg_class') c, pg_namespace n where c.relnamespace = n.oid and relname = 'sto_alt_uao3_idx' and n.nspname = 'alter_ao_table_index_@amname@'; - --- Verify that unique index is not allowed -CREATE UNIQUE INDEX uni_index ON sto_alt_uao3_idx (text_col); diff --git a/src/test/regress/input/uao_dml/ao_unique_index_build.source b/src/test/regress/input/uao_dml/ao_unique_index_build.source new file mode 100644 index 00000000000..30ee292aa00 --- /dev/null +++ b/src/test/regress/input/uao_dml/ao_unique_index_build.source @@ -0,0 +1,86 @@ +-- Test cases to cover CREATE UNIQUE INDEX on AO/CO tables. + +SET default_table_access_method TO @amname@; +-- To force index scans for smoke tests +SET enable_seqscan TO off; +SET optimizer TO off; + +-- Case 1: Build with no conflicting rows. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should succeed +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); +-- post-build smoke test +EXPLAIN SELECT * FROM unique_index_build_@amname@ WHERE i = 1; +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; +INSERT INTO unique_index_build_@amname@ VALUES(1); + +DROP TABLE unique_index_build_@amname@; + +-- Case 2: Build with conflicting row. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should ERROR out +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); + +DROP TABLE unique_index_build_@amname@; + +-- Case 3: Build with conflict on aborted row. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_build_@amname@ VALUES(1); +ABORT; +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should succeed +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); +-- post-build smoke test +EXPLAIN SELECT * FROM unique_index_build_@amname@ WHERE i = 1; +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; +INSERT INTO unique_index_build_@amname@ VALUES(1); + +DROP TABLE unique_index_build_@amname@; + +-- Case 4: Build with conflict on deleted row. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +DELETE FROM unique_index_build_@amname@; +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should succeed +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); +-- post-build smoke test +EXPLAIN SELECT * FROM unique_index_build_@amname@ WHERE i = 1; +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; +INSERT INTO unique_index_build_@amname@ VALUES(1); + +DROP TABLE unique_index_build_@amname@; + +-- Case 5: Partial unique index build +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +INSERT INTO unique_index_build_@amname@ VALUES(1); +INSERT INTO unique_index_build_@amname@ VALUES(2); +INSERT INTO unique_index_build_@amname@ VALUES(6); +INSERT INTO unique_index_build_@amname@ VALUES(6); +-- should fail as conflict lies in indexed portion of data +CREATE UNIQUE INDEX on unique_index_build_@amname@(i) WHERE i < 5; +-- removing conflict should make index build succeed +DELETE FROM unique_index_build_@amname@ WHERE i = 1; +CREATE UNIQUE INDEX on unique_index_build_@amname@(i) WHERE i < 5; +-- post build smoke tests: +-- should succeed as it lies in non-indexed portion +INSERT INTO unique_index_build_@amname@ VALUES(6); +-- should fail as conflict lies in indexed portion of data +INSERT INTO unique_index_build_@amname@ VALUES(2); +-- should succeed as there is no conflicting key that exists +INSERT INTO unique_index_build_@amname@ VALUES(3); +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; +SELECT * FROM unique_index_build_@amname@ WHERE i = 2; +SELECT * FROM unique_index_build_@amname@ WHERE i = 3; +SELECT * FROM unique_index_build_@amname@ WHERE i = 6; + +DROP TABLE unique_index_build_@amname@; + +RESET default_table_access_method; +RESET enable_seqscan; +RESET optimizer; diff --git a/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source b/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source index 4fcdf691b3f..6f88d3224c9 100644 --- a/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source +++ b/src/test/regress/input/uao_dml/uao_dml_unique_index_delete.source @@ -2,8 +2,6 @@ create schema uao_dml_unique_index_@amname@; set search_path=uao_dml_unique_index_@amname@; set default_table_access_method=@amname@; -SET gp_appendonly_enable_unique_index TO ON; - -- Case 1: Inserting tx inserting a deleted key--------------------------------- CREATE TABLE uao_unique_index_delete (a INT unique); INSERT INTO uao_unique_index_delete VALUES (1); diff --git a/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source b/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source index 07f40c179a8..fe7c16e3198 100644 --- a/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source +++ b/src/test/regress/input/uao_dml/uao_dml_unique_index_update.source @@ -2,8 +2,6 @@ create schema uao_dml_unique_index_update_@amname@; set search_path=uao_dml_unique_index_update_@amname@; set default_table_access_method=@amname@; -SET gp_appendonly_enable_unique_index TO ON; - -- Case 1: Inserting tx inserting a key affected by an update-------------------- CREATE TABLE uao_unique_index_update (a INT unique); INSERT INTO uao_unique_index_update VALUES (1); diff --git a/src/test/regress/output/uao_ddl/alter_ao_table_index.source b/src/test/regress/output/uao_ddl/alter_ao_table_index.source index 92c3731b7fb..761b95176ff 100644 --- a/src/test/regress/output/uao_ddl/alter_ao_table_index.source +++ b/src/test/regress/output/uao_ddl/alter_ao_table_index.source @@ -111,6 +111,3 @@ select relfrozenxid from gp_dist_random('pg_class') c, pg_namespace n where 0 (3 rows) --- Verify that unique index is not allowed -CREATE UNIQUE INDEX uni_index ON sto_alt_uao3_idx (text_col); -ERROR: append-only tables do not support unique indexes diff --git a/src/test/regress/output/uao_dml/ao_unique_index_build.source b/src/test/regress/output/uao_dml/ao_unique_index_build.source new file mode 100644 index 00000000000..734d4903c0f --- /dev/null +++ b/src/test/regress/output/uao_dml/ao_unique_index_build.source @@ -0,0 +1,152 @@ +-- Test cases to cover CREATE UNIQUE INDEX on AO/CO tables. +SET default_table_access_method TO @amname@; +-- To force index scans for smoke tests +SET enable_seqscan TO off; +SET optimizer TO off; +-- Case 1: Build with no conflicting rows. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should succeed +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); +-- post-build smoke test +EXPLAIN SELECT * FROM unique_index_build_@amname@ WHERE i = 1; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=8.19..8.19 rows=1 width=4) + -> Bitmap Heap Scan on unique_index_build_@amname@ (cost=4.18..8.19 rows=1 width=4) + Recheck Cond: (i = 1) + -> Bitmap Index Scan on unique_index_build_@amname@_i_idx (cost=0.00..4.18 rows=1 width=0) + Index Cond: (i = 1) + Optimizer: Postgres query optimizer +(6 rows) + +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; + i +--- + 1 +(1 row) + +INSERT INTO unique_index_build_@amname@ VALUES(1); +ERROR: duplicate key value violates unique constraint "unique_index_build_@amname@_i_idx" (seg0 192.168.0.148:7002 pid=1421591) +DETAIL: Key (i)=(1) already exists. +DROP TABLE unique_index_build_@amname@; +-- Case 2: Build with conflicting row. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should ERROR out +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); +ERROR: could not create unique index "unique_index_build_@amname@_i_idx" (seg0 192.168.0.148:7002 pid=1421591) +DETAIL: Key (i)=(1) is duplicated. +DROP TABLE unique_index_build_@amname@; +-- Case 3: Build with conflict on aborted row. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +BEGIN; +INSERT INTO unique_index_build_@amname@ VALUES(1); +ABORT; +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should succeed +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); +-- post-build smoke test +EXPLAIN SELECT * FROM unique_index_build_@amname@ WHERE i = 1; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=8.19..8.19 rows=1 width=4) + -> Bitmap Heap Scan on unique_index_build_@amname@ (cost=4.18..8.19 rows=1 width=4) + Recheck Cond: (i = 1) + -> Bitmap Index Scan on unique_index_build_@amname@_i_idx (cost=0.00..4.18 rows=1 width=0) + Index Cond: (i = 1) + Optimizer: Postgres query optimizer +(6 rows) + +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; + i +--- + 1 +(1 row) + +INSERT INTO unique_index_build_@amname@ VALUES(1); +ERROR: duplicate key value violates unique constraint "unique_index_build_@amname@_i_idx" (seg0 192.168.0.148:7002 pid=1421591) +DETAIL: Key (i)=(1) already exists. +DROP TABLE unique_index_build_@amname@; +-- Case 4: Build with conflict on deleted row. +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +DELETE FROM unique_index_build_@amname@; +INSERT INTO unique_index_build_@amname@ VALUES(1); +-- should succeed +CREATE UNIQUE INDEX on unique_index_build_@amname@(i); +-- post-build smoke test +EXPLAIN SELECT * FROM unique_index_build_@amname@ WHERE i = 1; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=8.19..8.19 rows=1 width=4) + -> Bitmap Heap Scan on unique_index_build_@amname@ (cost=4.18..8.19 rows=1 width=4) + Recheck Cond: (i = 1) + -> Bitmap Index Scan on unique_index_build_@amname@_i_idx (cost=0.00..4.18 rows=1 width=0) + Index Cond: (i = 1) + Optimizer: Postgres query optimizer +(6 rows) + +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; + i +--- + 1 +(1 row) + +INSERT INTO unique_index_build_@amname@ VALUES(1); +ERROR: duplicate key value violates unique constraint "unique_index_build_@amname@_i_idx" (seg0 192.168.0.148:7002 pid=1421591) +DETAIL: Key (i)=(1) already exists. +DROP TABLE unique_index_build_@amname@; +-- Case 5: Partial unique index build +CREATE TABLE unique_index_build_@amname@(i int) DISTRIBUTED REPLICATED; +INSERT INTO unique_index_build_@amname@ VALUES(1); +INSERT INTO unique_index_build_@amname@ VALUES(1); +INSERT INTO unique_index_build_@amname@ VALUES(2); +INSERT INTO unique_index_build_@amname@ VALUES(6); +INSERT INTO unique_index_build_@amname@ VALUES(6); +-- should fail as conflict lies in indexed portion of data +CREATE UNIQUE INDEX on unique_index_build_@amname@(i) WHERE i < 5; +ERROR: could not create unique index "unique_index_build_@amname@_i_idx" (seg0 192.168.0.148:7002 pid=3690142) +DETAIL: Key (i)=(1) is duplicated. +-- removing conflict should make index build succeed +DELETE FROM unique_index_build_@amname@ WHERE i = 1; +CREATE UNIQUE INDEX on unique_index_build_@amname@(i) WHERE i < 5; +-- post build smoke tests: +-- should succeed as it lies in non-indexed portion +INSERT INTO unique_index_build_@amname@ VALUES(6); +-- should fail as conflict lies in indexed portion of data +INSERT INTO unique_index_build_@amname@ VALUES(2); +ERROR: duplicate key value violates unique constraint "unique_index_build_@amname@_i_idx" (seg1 192.168.0.148:7003 pid=3690143) +DETAIL: Key (i)=(2) already exists. +-- should succeed as there is no conflicting key that exists +INSERT INTO unique_index_build_@amname@ VALUES(3); +SELECT * FROM unique_index_build_@amname@ WHERE i = 1; + i +--- +(0 rows) + +SELECT * FROM unique_index_build_@amname@ WHERE i = 2; + i +--- + 2 +(1 row) + +SELECT * FROM unique_index_build_@amname@ WHERE i = 3; + i +--- + 3 +(1 row) + +SELECT * FROM unique_index_build_@amname@ WHERE i = 6; + i +--- + 6 + 6 + 6 +(3 rows) + +DROP TABLE unique_index_build_@amname@; +RESET default_table_access_method; +RESET enable_seqscan; +RESET optimizer; diff --git a/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source b/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source index 39639389e93..3d9ce087e0b 100644 --- a/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source +++ b/src/test/regress/output/uao_dml/uao_dml_unique_index_delete.source @@ -1,7 +1,6 @@ create schema uao_dml_unique_index_@amname@; set search_path=uao_dml_unique_index_@amname@; set default_table_access_method=@amname@; -SET gp_appendonly_enable_unique_index TO ON; -- Case 1: Inserting tx inserting a deleted key--------------------------------- CREATE TABLE uao_unique_index_delete (a INT unique); INSERT INTO uao_unique_index_delete VALUES (1); diff --git a/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source b/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source index 69c11abe876..62000c723bf 100644 --- a/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source +++ b/src/test/regress/output/uao_dml/uao_dml_unique_index_update.source @@ -1,7 +1,6 @@ create schema uao_dml_unique_index_update_@amname@; set search_path=uao_dml_unique_index_update_@amname@; set default_table_access_method=@amname@; -SET gp_appendonly_enable_unique_index TO ON; -- Case 1: Inserting tx inserting a key affected by an update-------------------- CREATE TABLE uao_unique_index_update (a INT unique); INSERT INTO uao_unique_index_update VALUES (1); diff --git a/src/test/regress/sql/alter_table_aocs.sql b/src/test/regress/sql/alter_table_aocs.sql index 3a16ca3fa0d..89cff756f5f 100644 --- a/src/test/regress/sql/alter_table_aocs.sql +++ b/src/test/regress/sql/alter_table_aocs.sql @@ -308,9 +308,6 @@ select attstattarget from pg_attribute where attrelid = 'aocs_addcol.addcol1'::r alter table addcol1 set distributed randomly; alter table addcol1 set distributed by (a); --- test some constraints (unique indexes do not work for unique and pkey) -alter table addcol1 add constraint tunique unique(a); -alter table addcol1 add constraint tpkey primary key(a); alter table addcol1 add constraint tcheck check (a is not null); -- test changing the storage type of a column diff --git a/src/test/regress/sql/qp_with_clause.sql b/src/test/regress/sql/qp_with_clause.sql index b2eea635d68..ad018427f55 100644 --- a/src/test/regress/sql/qp_with_clause.sql +++ b/src/test/regress/sql/qp_with_clause.sql @@ -8133,16 +8133,6 @@ CREATE TABLE countrylanguage_ao ( percentage real NOT NULL ) with (appendonly=true) distributed by (countrycode,language); -ALTER TABLE ONLY city_ao - ADD CONSTRAINT city_ao_pkey PRIMARY KEY (id); - -ALTER TABLE ONLY country_ao - ADD CONSTRAINT country_ao_pkey PRIMARY KEY (code); - -ALTER TABLE ONLY countrylanguage_ao - ADD CONSTRAINT countrylanguage_ao_pkey PRIMARY KEY (countrycode, "language"); - - create index bitmap_city_ao_countrycode on city_ao using bitmap(countrycode); create index bitmap_country_ao_gf on country_ao using bitmap(governmentform); create index bitmap_country_ao_region on country_ao using bitmap(region); @@ -8620,16 +8610,6 @@ CREATE TABLE countrylanguage_co ( percentage real NOT NULL ) with (appendonly=true,orientation=column) distributed by (countrycode,language); -ALTER TABLE ONLY city_co - ADD CONSTRAINT city_co_pkey PRIMARY KEY (id); - -ALTER TABLE ONLY country_co - ADD CONSTRAINT country_co_pkey PRIMARY KEY (code); - -ALTER TABLE ONLY countrylanguage_co - ADD CONSTRAINT countrylanguage_co_pkey PRIMARY KEY (countrycode, "language"); - - create index bitmap_city_co_countrycode on city_co using bitmap(countrycode); create index bitmap_country_co_gf on country_co using bitmap(governmentform); create index bitmap_country_co_region on country_co using bitmap(region); From ba0aa0728c7f938efc1279bca3b413de2ca571c2 Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Mon, 10 Oct 2022 19:38:40 -0700 Subject: [PATCH 15/19] Support VACUUM on AO/CO tables with unique indexes AO lazy VACUUM is different from heap vacuum in the sense that ctids of data tuples change (and the index tuples need to be updated as a consequence). It leverages the scan and insert code to scan live tuples from each segfile and to move (insert) them in a target segfile. The previous commit that added support for INSERT on AO tables with unique indexes, is sufficient to entertain uniqueness checks running concurrently with vacuum. That commit did not introduce the placeholder row mechanism to the vacuum code however, as it is not necessary for correctness. This is because the old index entries will still point to the segment being compacted. This will be the case up until the index entries are bulk deleted, but by then the new index entries along with new block directory rows would already have been written and would be able to answer uniqueness checks. This commit has 2 main contributions: 1. Concurrency coverage for uniqueness checks running parallel with VACUUM, to enforce that the mechanism added in the prior INSERT commit is sufficient. 2. Running uniqueness checks in the backend running AO VACUUM results in spurious uniqueness conflicts. Live tuples being moved (inserted into a new segfile) report a unique constraint violation against themselves. Example: test=# SET debug_appendonly_print_blockdirectory TO ON; SET test=# create table bar(i int unique) using ao_row; CREATE TABLE test=# insert into bar select generate_series(1, 10); INSERT 0 10 test=# delete from bar where i = 1; DELETE 1 test=# vacuum full verbose bar; INFO: vacuuming "public.bar" INFO: compacting "public.bar" LOG: For segno = 0, rownum = 2, tid returned: (0,2) tuple (xmin, xmax) = (940, 0), snaptype = 4 ERROR: duplicate key value violates unique constraint "bar_i_key" DETAIL: Key (i)=(2) already exists. The above reports a conflict while trying to move tuple with i = 2. Remedy this by adding the capability to bypass uniqueness checks when inserting index tuples in ExecInsertIndexTuples(). This is done by a new EState member: gp_bypass_unique_check, which AO lazy VACUUM can set. Co-authored-by: Ashwin Agrawal --- src/backend/access/aocs/aocs_compaction.c | 8 ++++++++ src/backend/access/appendonly/appendonly_compaction.c | 8 ++++++++ src/backend/commands/vacuum_ao.c | 2 ++ src/backend/executor/execIndexing.c | 10 +++++++++- src/backend/executor/execUtils.c | 2 ++ src/include/nodes/execnodes.h | 8 +++++++- src/test/isolation2/isolation2_schedule | 2 ++ 7 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/backend/access/aocs/aocs_compaction.c b/src/backend/access/aocs/aocs_compaction.c index bb801acfeb2..e80887af9ba 100644 --- a/src/backend/access/aocs/aocs_compaction.c +++ b/src/backend/access/aocs/aocs_compaction.c @@ -282,6 +282,14 @@ AOCSSegmentFileFullCompaction(Relation aorel, estate->es_opened_result_relations = lappend(estate->es_opened_result_relations, resultRelInfo); + /* + * We don't want uniqueness checks to be performed while "insert"ing tuples + * to a destination segfile during AOCSMoveTuple(). This is to ensure that + * we can avoid spurious conflicts between the moved tuple and the original + * tuple. + */ + estate->gp_bypass_unique_check = true; + while (aocs_getnext(scanDesc, ForwardScanDirection, slot)) { CHECK_FOR_INTERRUPTS(); diff --git a/src/backend/access/appendonly/appendonly_compaction.c b/src/backend/access/appendonly/appendonly_compaction.c index 7aeb7b0382c..f4c789695af 100644 --- a/src/backend/access/appendonly/appendonly_compaction.c +++ b/src/backend/access/appendonly/appendonly_compaction.c @@ -454,6 +454,14 @@ AppendOnlySegmentFileFullCompaction(Relation aorel, estate->es_opened_result_relations = lappend(estate->es_opened_result_relations, resultRelInfo); + /* + * We don't want uniqueness checks to be performed while "insert"ing tuples + * to a destination segfile during AppendOnlyMoveTuple(). This is to ensure + * that we can avoid spurious conflicts between the moved tuple and the + * original tuple. + */ + estate->gp_bypass_unique_check = true; + /* * Go through all visible tuples and move them to a new segfile. */ diff --git a/src/backend/commands/vacuum_ao.c b/src/backend/commands/vacuum_ao.c index e9a4b1265d6..6231b407c4a 100644 --- a/src/backend/commands/vacuum_ao.c +++ b/src/backend/commands/vacuum_ao.c @@ -616,6 +616,8 @@ vacuum_appendonly_index(Relation indexRelation, stats = index_bulk_delete(&ivinfo, NULL, appendonly_tid_reaped, (void *) vacuumIndexState); + SIMPLE_FAULT_INJECTOR("vacuum_ao_after_index_delete"); + /* Do post-VACUUM cleanup */ stats = index_vacuum_cleanup(&ivinfo, stats); diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index 74becdc85df..1f524040fd3 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -273,6 +273,14 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * * If 'arbiterIndexes' is nonempty, noDupErr applies only to * those indexes. NIL means noDupErr applies to all indexes. + * + * GPDB: gp_bypass_unique_check is introduced so that routines + * such as AO vacuum which don't need to run uniqueness checks + * while inserting tuples can do so. + * + * CAUTION: this must not be called for a HOT update. + * We can't defend against that here for lack of info. + * Should we change the API to make it safer? * ---------------------------------------------------------------- */ List * @@ -388,7 +396,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, * For a speculative insertion (used by INSERT ... ON CONFLICT), do * the same as for a deferrable unique index. */ - if (!indexRelation->rd_index->indisunique) + if (!indexRelation->rd_index->indisunique || estate->gp_bypass_unique_check) checkUnique = UNIQUE_CHECK_NO; else if (applyNoDupErr) checkUnique = UNIQUE_CHECK_PARTIAL; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 0ce8f755548..aaf6f13434a 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -207,6 +207,8 @@ CreateExecutorState(void) estate->useMppParallelMode = false; estate->eliminateAliens = false; + estate->gp_bypass_unique_check = false; + /* * Return the executor state structure */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 9a998212fcc..c7578ffd5a5 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -712,7 +712,13 @@ typedef struct EState /* Should the executor skip past the alien plan nodes */ bool eliminateAliens; - Bitmapset *locallyExecutableSubplans; + + /* + * GPDB: gp_bypass_unique_check is introduced so that routines, such as AO + * vacuum, can avoid running uniqueness checks while inserting tuples. + */ + bool gp_bypass_unique_check; + } EState; struct PlanState; diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 83823c5f31c..af9cd7c13ae 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -305,3 +305,5 @@ test: sync_guc # Tests for unique indexes on AO/CO tables (uses fault injector) test: ao_unique_index test: aocs_unique_index +test: uao/ao_unique_index_vacuum_row +test: uao/ao_unique_index_vacuum_column From 521ef556818559eb9518f390857ab057a6f1790c Mon Sep 17 00:00:00 2001 From: Huansong Fu Date: Mon, 10 Apr 2023 12:54:35 -0700 Subject: [PATCH 16/19] Fix flaky test fsync_ao The test was flaky w/ a diff like this: --- /tmp/build/e18b2f02/gpdb_src/src/test/isolation2/expected/fsync_ao.out 2023-04-10 11:51:22.694100534 +0000 +++ /tmp/build/e18b2f02/gpdb_src/src/test/isolation2/results/fsync_ao.out 2023-04-10 11:51:22.706101385 +0000 @@ -107,7 +107,7 @@ gp_inject_fault ------------ - Success: fault name:'ao_fsync_counter' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'3' + Success: fault name:'ao_fsync_counter' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'2' (1 row) The reason seems to be that there's one additional checkpoint being replayed by mirror. The exact reason for the additional checkpoint is unknown, but if I were to guess, most probably the mirror was slow in relaying records and it replayed a checkpoint generated from a previous test after the fault 'restartpoint_guts' was inserted. This would be hard to prevent. However, we should be able to fix the flakiness by waiting until the faults 'ao_fsync_counter' is being hit the exact same times as we expect. --- src/test/isolation2/expected/fsync_ao.out | 20 +++++++++++++++++--- src/test/isolation2/sql/fsync_ao.sql | 9 +++++++-- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/test/isolation2/expected/fsync_ao.out b/src/test/isolation2/expected/fsync_ao.out index 36ea980932d..5c4bd17f3ed 100644 --- a/src/test/isolation2/expected/fsync_ao.out +++ b/src/test/isolation2/expected/fsync_ao.out @@ -83,13 +83,17 @@ select gp_wait_until_triggered_fault('restartpoint_guts', 2, dbid) from gp_segme (1 row) -- Validate that the number of files fsync'ed by checkpointer (on --- mirror). `num times hit` is corresponding to the number of files --- synced by `ao_fsync_counter` fault. +-- mirror). This should match the number of files for fsync_ao and fsync_co. +select gp_wait_until_triggered_fault('ao_fsync_counter', 3, dbid) from gp_segment_configuration where content=0 and role='m'; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) select gp_inject_fault('ao_fsync_counter', 'status', dbid) from gp_segment_configuration where content=0 and role='m'; gp_inject_fault ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - Success: fault name:'ao_fsync_counter' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'3' + Success: fault name:'ao_fsync_counter' fault type:'skip' ddl statement:'' database name:'' table name:'' start occurrence:'1' end occurrence:'-1' extra arg:'0' fault injection state:'triggered' num times hit:'3' (1 row) -- Test vacuum compaction with more than one segment file per table. @@ -186,6 +190,11 @@ select gp_wait_until_triggered_fault('restartpoint_guts', 3, dbid) from gp_segme (1 row) -- Expect the segment files that were updated by vacuum to be fsync'ed. +select gp_wait_until_triggered_fault('ao_fsync_counter', 12, dbid) from gp_segment_configuration where content=0 and role='m'; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) select gp_inject_fault('ao_fsync_counter', 'status', dbid) from gp_segment_configuration where content=0 and role='m'; gp_inject_fault ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -227,6 +236,11 @@ select gp_wait_until_triggered_fault('restartpoint_guts', 4, dbid) from gp_segme -- Expect that fsync is only performed for fsync_ao table (1 file) but -- not for fsync_co table because it was dropped after being updated. +select gp_wait_until_triggered_fault('ao_fsync_counter', 13, dbid) from gp_segment_configuration where content=0 and role='m'; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) select gp_inject_fault('ao_fsync_counter', 'status', dbid) from gp_segment_configuration where content=0 and role='m'; gp_inject_fault ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/src/test/isolation2/sql/fsync_ao.sql b/src/test/isolation2/sql/fsync_ao.sql index f2ebc6779d6..1e04b3ffdf4 100644 --- a/src/test/isolation2/sql/fsync_ao.sql +++ b/src/test/isolation2/sql/fsync_ao.sql @@ -58,8 +58,9 @@ select gp_wait_until_triggered_fault('restartpoint_guts', 2, dbid) from gp_segment_configuration where content=0 and role='m'; -- Validate that the number of files fsync'ed by checkpointer (on --- mirror). `num times hit` is corresponding to the number of files --- synced by `ao_fsync_counter` fault. +-- mirror). This should match the number of files for fsync_ao and fsync_co. +select gp_wait_until_triggered_fault('ao_fsync_counter', 3, dbid) + from gp_segment_configuration where content=0 and role='m'; select gp_inject_fault('ao_fsync_counter', 'status', dbid) from gp_segment_configuration where content=0 and role='m'; @@ -94,6 +95,8 @@ select gp_wait_until_triggered_fault('restartpoint_guts', 3, dbid) from gp_segment_configuration where content=0 and role='m'; -- Expect the segment files that were updated by vacuum to be fsync'ed. +select gp_wait_until_triggered_fault('ao_fsync_counter', 12, dbid) + from gp_segment_configuration where content=0 and role='m'; select gp_inject_fault('ao_fsync_counter', 'status', dbid) from gp_segment_configuration where content=0 and role='m'; @@ -118,6 +121,8 @@ select gp_wait_until_triggered_fault('restartpoint_guts', 4, dbid) -- Expect that fsync is only performed for fsync_ao table (1 file) but -- not for fsync_co table because it was dropped after being updated. +select gp_wait_until_triggered_fault('ao_fsync_counter', 13, dbid) + from gp_segment_configuration where content=0 and role='m'; select gp_inject_fault('ao_fsync_counter', 'status', dbid) from gp_segment_configuration where content=0 and role='m'; From ecb8c18a0f7aba62a14cdea2f03b85491ac11596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= Date: Mon, 7 Mar 2022 17:22:34 +0800 Subject: [PATCH 17/19] Vacuum index on Append-Optimized table enhancement. In the past, we rely on the visibility map to check if an index entry can be removed. Scanning VM is not such effective. With this new strategy, we do index vacuum iff after we recycle a PendingDrop segment, and think the index entry is removable only check if it points to a dropable segment, so the checking would be effective, and dropable segments also guarantee the index entry is invisible to anyone already. To make the new VACUUM strategy work properly, it is necessary to do following refactor for recycling dead segments logic: - Combine recycling dead segments and index vacuum together for data consistency and crash safety. - Relax the requirement for AccessExclusiveLock in VACUUM drop phase: acquire AccessShareLock for scaning segfiles; acquire ExclusiveLock for truncating dead segfiles. Co-authored-by: Haolin Wang --- .../access/appendonly/appendonly_compaction.c | 127 ++++--- src/backend/commands/vacuum.c | 34 +- src/backend/commands/vacuum_ao.c | 326 ++++++------------ src/include/access/appendonly_compaction.h | 3 +- src/include/commands/vacuum.h | 8 - .../input/uao/vacuum_cleanup.source | 148 +++++++- .../input/uao/vacuum_index_stats.source | 63 ++++ src/test/isolation2/isolation2_schedule | 2 + .../output/uao/vacuum_cleanup.source | 19 +- .../output/uao/vacuum_index_stats.source | 107 ++++++ src/test/regress/expected/brin_ao.out | 6 + .../regress/expected/brin_ao_optimizer.out | 6 + src/test/regress/expected/brin_aocs.out | 6 + .../regress/expected/brin_aocs_optimizer.out | 6 + .../expected/uao_compaction/drop_column.out | 6 + .../expected/uao_compaction/full_stats.out | 6 + .../expected/uao_compaction/index_stats.out | 6 + .../regress/expected/uao_compaction/stats.out | 8 +- .../expected/uaocs_compaction/drop_column.out | 6 + .../expected/uaocs_compaction/full_stats.out | 6 + .../expected/uaocs_compaction/index_stats.out | 6 + .../expected/uaocs_compaction/stats.out | 6 + src/test/regress/input/uao_dml/uao_dml.source | 8 +- .../regress/output/uao_dml/uao_dml.source | 6 + src/test/regress/sql/brin_ao.sql | 6 + src/test/regress/sql/brin_aocs.sql | 6 + .../sql/uao_compaction/drop_column.sql | 6 + .../regress/sql/uao_compaction/full_stats.sql | 6 + .../sql/uao_compaction/index_stats.sql | 7 + src/test/regress/sql/uao_compaction/stats.sql | 7 + .../sql/uaocs_compaction/drop_column.sql | 6 + .../sql/uaocs_compaction/full_stats.sql | 6 + .../sql/uaocs_compaction/index_stats.sql | 7 + .../regress/sql/uaocs_compaction/stats.sql | 7 + 34 files changed, 660 insertions(+), 329 deletions(-) create mode 100644 src/test/isolation2/input/uao/vacuum_index_stats.source create mode 100644 src/test/isolation2/output/uao/vacuum_index_stats.source diff --git a/src/backend/access/appendonly/appendonly_compaction.c b/src/backend/access/appendonly/appendonly_compaction.c index f4c789695af..8fe266a38ea 100644 --- a/src/backend/access/appendonly/appendonly_compaction.c +++ b/src/backend/access/appendonly/appendonly_compaction.c @@ -529,39 +529,28 @@ AppendOnlySegmentFileFullCompaction(Relation aorel, } /* - * Recycle AWAITING_DROP segments. - * - * This tries to acquire an AccessExclusiveLock on the table, if it's - * available. If it's not, no segments are dropped. + * Collect AWAITING_DROP segments. + * + * Acquire AccessShareLock with cutoff_xid to scan and collect dead + * segments. */ -void -AppendOptimizedRecycleDeadSegments(Relation aorel) +Bitmapset * +AppendOptimizedCollectDeadSegments(Relation aorel) { Relation pg_aoseg_rel; TupleDesc pg_aoseg_dsc; SysScanDesc aoscan; HeapTuple tuple; Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); - bool got_accessexclusive_lock = false; TransactionId cutoff_xid = InvalidTransactionId; Oid segrelid; + Bitmapset *dead_segs = NULL; Assert(RelationIsAppendOptimized(aorel)); - /* - * The algorithm below for choosing a target segment is not concurrent-safe. - * Grab a lock to serialize. - * - * INterlocks with SetSegnoInternal() - */ - LockDatabaseObject(aorel->rd_node.dbNode, (Oid)aorel->rd_node.relNode, 0, ExclusiveLock); - GetAppendOnlyEntryAuxOids(aorel->rd_id, appendOnlyMetaDataSnapshot, &segrelid, NULL, NULL, NULL, NULL); - /* - * Now pick a segment that is not in use, and is not over the allowed - * size threshold (90% full). - */ + pg_aoseg_rel = heap_open(segrelid, AccessShareLock); pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); @@ -581,7 +570,9 @@ AppendOptimizedRecycleDeadSegments(Relation aorel) pg_aoseg_dsc, &isNull)); Assert(!isNull); - state = fastgetattr(tuple, Anum_pg_aoseg_state, pg_aoseg_dsc, &isNull); + state = DatumGetInt16(fastgetattr(tuple, + Anum_pg_aoseg_state, + pg_aoseg_dsc, &isNull)); Assert(!isNull); } else @@ -601,24 +592,8 @@ AppendOptimizedRecycleDeadSegments(Relation aorel) continue; /* - * Upgrade our lock to AccessExclusiveLock for the drop. Upgrading a - * lock poses a deadlock risk, so give up if we cannot acquire the - * lock immediately. We'll retry dropping the segment on the next - * VACUUM. - */ - if (!got_accessexclusive_lock) - { - if (!ConditionalLockRelation(aorel, AccessExclusiveLock)) - { - if (Debug_appendonly_print_compaction) - elog(LOG, "could not acquire AccessExclusiveLock lock on %s to recycle segno %d", - RelationGetRelationName(aorel), segno); - break; - } - got_accessexclusive_lock = true; - } - - /* + * Cutoff XID Screening + * * It's in awaiting-drop state, but does everyone see it that way? * * Compare the tuple's xmin with the oldest-xmin horizon. We don't bother @@ -626,6 +601,22 @@ AppendOptimizedRecycleDeadSegments(Relation aorel) * should not be set. Even if the tuple was update, presumably an AO * segment that's in awaiting-drop state won't be resurrected, so even if * someone updates or locks the tuple, it's still safe to drop. + * + * We don't need to acquire AccessExclusiveLock any longer because we only + * scan pg_aoseg to collect dead segments but no truncaste happens here. + * Considering the following two cases: + * + * a) When there was a reader accessing a segment file which was changed to + * AWAITING_DROP in later VACUUM compaction, the reader's xid should be earlier + * than this tuple's xmin hence would set visible_to_all to false. Then the + * AWAITING_DROP segment file wouldn't be dropped in this VACUUM cleanup and + * the earlier reader could still be able to access old tuples. + * + * b) Continue above, so there was a segment file in AWAITING_DROP state, the + * subsequent transactions can't see that hence it wouldn't be touched until + * next VACUUM is arrived. Therefore no later transaction's xid could be earlier + * than this dead segment tuple's xmin hence it would be true on visible_to_all. + * Then the corresponding dead segment file could be dropped later at that time. */ xmin = HeapTupleHeaderGetXmin(tuple->t_data); if (xmin == FrozenTransactionId) @@ -641,25 +632,55 @@ AppendOptimizedRecycleDeadSegments(Relation aorel) if (!visible_to_all) continue; - /* all set! */ - if (RelationIsAoRows(aorel)) - { - AppendOnlyCompaction_DropSegmentFile(aorel, segno); - ClearFileSegInfo(aorel, segno); - } - else - { - AOCSCompaction_DropSegmentFile(aorel, segno); - ClearAOCSFileSegInfo(aorel, segno); - } + /* collect dead segnos for dropping */ + dead_segs = bms_add_member(dead_segs, segno); } systable_endscan(aoscan); - UnlockDatabaseObject(aorel->rd_node.dbNode, (Oid)aorel->rd_node.relNode, 0, ExclusiveLock); - heap_close(pg_aoseg_rel, AccessShareLock); UnregisterSnapshot(appendOnlyMetaDataSnapshot); + + return dead_segs; +} + +/* + * Drop AWAITING_DROP segments. + * + * Callers should guarantee that the segfile is no longer needed by any + * running transaction. It is not necessary to hold a lock on the segfile + * row, though. + */ +static inline void +AppendOptimizedDropDeadSegment(Relation aorel, int segno) +{ + if (RelationIsAoRows(aorel)) + { + AppendOnlyCompaction_DropSegmentFile(aorel, segno); + ClearFileSegInfo(aorel, segno); + } + else + { + AOCSCompaction_DropSegmentFile(aorel, segno); + ClearAOCSFileSegInfo(aorel, segno); + } +} + +void +AppendOptimizedDropDeadSegments(Relation aorel, Bitmapset *segnos) +{ + int segno; + + /* + * drop segments in batch with concurrent-safety + */ + LockRelationForExtension(aorel, ExclusiveLock); + + segno = -1; + while ((segno = bms_next_member(segnos, segno)) >= 0) + AppendOptimizedDropDeadSegment(aorel, segno); + + UnlockRelationForExtension(aorel, ExclusiveLock); } /* @@ -691,10 +712,6 @@ AppendOptimizedTruncateToEOF(Relation aorel) GetAppendOnlyEntryAuxOids(aorel->rd_id, appendOnlyMetaDataSnapshot, &segrelid, NULL, NULL, NULL, NULL); - /* - * Now pick a segment that is not in use, and is not over the allowed - * size threshold (90% full). - */ pg_aoseg_rel = heap_open(segrelid, AccessShareLock); pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 0c8ea4aaf20..ff8397852d7 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2558,27 +2558,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, LockRelation(rel, ShareLock); } - /* - * Do the actual work --- either FULL or "lazy" vacuum - */ - if (ao_vacuum_phase == VACOPT_AO_PRE_CLEANUP_PHASE) - { - ao_vacuum_rel_pre_cleanup(rel, params->options, params, vac_strategy); - } - else if (ao_vacuum_phase == VACOPT_AO_COMPACT_PHASE) - { - ao_vacuum_rel_compact(rel, params->options, params, vac_strategy); - } - else if (ao_vacuum_phase == VACOPT_AO_POST_CLEANUP_PHASE) - { - ao_vacuum_rel_post_cleanup(rel, params->options, params, vac_strategy); - } - else if (is_appendoptimized) - { - /* Do nothing here, we will launch the stages later */ - Assert(ao_vacuum_phase == 0); - } - else if ((params->options & VACOPT_FULL)) + if (!is_appendoptimized && (params->options & VACOPT_FULL)) { ClusterParams cluster_params = {0}; @@ -2592,8 +2572,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ cluster_rel(relid, InvalidOid, &cluster_params); } - else - table_relation_vacuum(rel, params, vac_strategy); + else /* Heap vacuum or AO/CO vacuum in specific phase */ + table_relation_vacuum(onerel, params, vac_strategy); /* Roll back any GUC changes executed by index functions */ AtEOXact_GUC(false, save_nestlevel); @@ -2611,10 +2591,13 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, PopActiveSnapshot(); CommitTransactionCommand(); + /* entrance of Append-Optimized table vacuum */ if (is_appendoptimized && ao_vacuum_phase == 0) { - int orig_options = params->options; + int orig_options = params->options; + /* orchestrate the AO vacuum phases */ + /* * Do cleanup first, to reclaim as much space as possible that * was left behind from previous VACUUMs. This runs under local @@ -2627,7 +2610,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, params->options = orig_options | VACOPT_AO_COMPACT_PHASE; vacuum_rel(relid, this_rangevar, params, false); - /* Do a final round of cleanup. Hopefully, this can drop the segments + /* + * Do a final round of cleanup. Hopefully, this can drop the segments * that were compacted in the previous phase. */ params->options = orig_options | VACOPT_AO_POST_CLEANUP_PHASE; diff --git a/src/backend/commands/vacuum_ao.c b/src/backend/commands/vacuum_ao.c index 6231b407c4a..6aecb6d5965 100644 --- a/src/backend/commands/vacuum_ao.c +++ b/src/backend/commands/vacuum_ao.c @@ -115,6 +115,7 @@ */ #include "postgres.h" +#include "access/table.h" #include "access/aocs_compaction.h" #include "access/appendonlywriter.h" #include "access/appendonly_compaction.h" @@ -142,19 +143,10 @@ #include "utils/pg_rusage.h" #include "cdb/cdbappendonlyblockdirectory.h" -/* - * State information used during the vacuum of indexes on append-only tables - */ -typedef struct AppendOnlyIndexVacuumState -{ - AppendOnlyVisimap visiMap; - AppendOnlyBlockDirectory blockDirectory; - AppendOnlyBlockDirectoryEntry blockDirectoryEntry; -} AppendOnlyIndexVacuumState; static void vacuum_appendonly_index(Relation indexRelation, - AppendOnlyIndexVacuumState *vacuumIndexState, double rel_tuple_count, + Bitmapset *dead_segs, int elevel, BufferAccessStrategy bstrategy); @@ -162,20 +154,18 @@ static bool appendonly_tid_reaped(ItemPointer itemptr, void *state); static void vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot, int elevel, BlockNumber *rel_pages, double *rel_tuples, - bool *relhasindex, BlockNumber *total_file_segs); -static int vacuum_appendonly_indexes(Relation aoRelation, int options, + bool *relhasindex); +static int vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs, BufferAccessStrategy bstrategy); -static void scan_index(Relation indrel, - AppendOnlyIndexVacuumState *vacuumIndexState, - double num_tuples, - int elevel, BufferAccessStrategy vac_strategy); +static void ao_vacuum_rel_recycle_dead_segments(Relation onerel, VacuumParams *params, + BufferAccessStrategy bstrategy); -void -ao_vacuum_rel_pre_cleanup(Relation onerel, int options, VacuumParams *params, - BufferAccessStrategy bstrategy) +static void +ao_vacuum_rel_pre_cleanup(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy) { char *relname; int elevel; + int options = params->options; if (options & VACOPT_VERBOSE) elevel = INFO; @@ -185,7 +175,13 @@ ao_vacuum_rel_pre_cleanup(Relation onerel, int options, VacuumParams *params, if (Gp_role == GP_ROLE_DISPATCH) elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */ - /* + relname = RelationGetRelationName(onerel); + ereport(elevel, + (errmsg("vacuuming \"%s.%s\"", + get_namespace_name(RelationGetNamespace(onerel)), + relname))); + + /* * Truncate AWAITING_DROP segments that are no longer visible to anyone * to 0 bytes. We cannot actually remove them yet, because there might * still be index entries pointing to them. We cannot recycle the segments @@ -197,14 +193,7 @@ ao_vacuum_rel_pre_cleanup(Relation onerel, int options, VacuumParams *params, * * This could run in a local transaction. */ - - relname = RelationGetRelationName(onerel); - ereport(elevel, - (errmsg("vacuuming \"%s.%s\"", - get_namespace_name(RelationGetNamespace(onerel)), - relname))); - - AppendOptimizedRecycleDeadSegments(onerel); + ao_vacuum_rel_recycle_dead_segments(onerel, params, bstrategy); /* * Also truncate all live segments to the EOF values stored in pg_aoseg. @@ -214,9 +203,8 @@ ao_vacuum_rel_pre_cleanup(Relation onerel, int options, VacuumParams *params, } -void -ao_vacuum_rel_post_cleanup(Relation onerel, int options, VacuumParams *params, - BufferAccessStrategy bstrategy) +static void +ao_vacuum_rel_post_cleanup(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy) { BlockNumber relpages; double reltuples; @@ -226,6 +214,7 @@ ao_vacuum_rel_post_cleanup(Relation onerel, int options, VacuumParams *params, */ BlockNumber total_file_segs; int elevel; + int options = params->options; TransactionId OldestXmin; TransactionId FreezeLimit; MultiXactId MultiXactCutoff; @@ -240,20 +229,21 @@ ao_vacuum_rel_post_cleanup(Relation onerel, int options, VacuumParams *params, if (Gp_role == GP_ROLE_DISPATCH) elevel = DEBUG2; /* vacuum and analyze messages aren't interesting from the QD */ - /*----- + /* * This could run in a *local* transaction: * * 1. Recycled any dead AWAITING_DROP segments, like in the * pre-cleanup phase. * * 2. Vacuum indexes. - *---- + * + * 3. Drop/Truncate dead segments. + * + * 4. Update statistics. */ Assert(RelationIsAoRows(onerel) || RelationIsAoCols(onerel)); - AppendOptimizedRecycleDeadSegments(onerel); - - vacuum_appendonly_indexes(onerel, options, bstrategy); + ao_vacuum_rel_recycle_dead_segments(onerel, params, bstrategy); /* Update statistics in pg_class */ vacuum_appendonly_fill_stats(onerel, GetActiveSnapshot(), @@ -285,17 +275,16 @@ ao_vacuum_rel_post_cleanup(Relation onerel, int options, VacuumParams *params, true /* isvacuum */); } -void -ao_vacuum_rel_compact(Relation onerel, int options, VacuumParams *params, - BufferAccessStrategy bstrategy) +static void +ao_vacuum_rel_compact(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy) { int compaction_segno; int insert_segno; List *compacted_segments = NIL; List *compacted_and_inserted_segments = NIL; - Snapshot appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); char *relname; int elevel; + int options = params->options; /* * This should run in a distributed transaction. But also allow utility @@ -372,8 +361,6 @@ ao_vacuum_rel_compact(Relation onerel, int options, VacuumParams *params, */ CommandCounterIncrement(); } - - UnregisterSnapshot(appendOnlyMetaDataSnapshot); } /* @@ -393,62 +380,73 @@ ao_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy * Do the actual work --- either FULL or "lazy" vacuum */ if (ao_vacuum_phase == VACOPT_AO_PRE_CLEANUP_PHASE) - ao_vacuum_rel_pre_cleanup(rel, params->options, params, bstrategy); + ao_vacuum_rel_pre_cleanup(rel, params, bstrategy); else if (ao_vacuum_phase == VACOPT_AO_COMPACT_PHASE) - ao_vacuum_rel_compact(rel, params->options, params, bstrategy); + ao_vacuum_rel_compact(rel, params, bstrategy); else if (ao_vacuum_phase == VACOPT_AO_POST_CLEANUP_PHASE) - ao_vacuum_rel_post_cleanup(rel, params->options, params, bstrategy); + ao_vacuum_rel_post_cleanup(rel, params, bstrategy); else /* Do nothing here, we will launch the stages later */ Assert(ao_vacuum_phase == 0); } - -static bool -vacuum_appendonly_index_should_vacuum(Relation aoRelation, - int options, - Snapshot snapshot, - AppendOnlyIndexVacuumState *vacuumIndexState, - double *rel_tuple_count) +/* + * Recycling AWAITING_DROP segments. + */ +static void +ao_vacuum_rel_recycle_dead_segments(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy) { - int64 hidden_tupcount; - FileSegTotals *totals; - - Assert(RelationIsAppendOptimized(aoRelation)); - - if (Gp_role == GP_ROLE_DISPATCH) - { - if (rel_tuple_count) - { - *rel_tuple_count = 0.0; - } - return false; - } + Bitmapset *dead_segs; + int options = params->options; + bool need_drop; - if (RelationIsAoRows(aoRelation)) + dead_segs = AppendOptimizedCollectDeadSegments(onerel); + need_drop = !bms_is_empty(dead_segs); + if (need_drop) { - totals = GetSegFilesTotals(aoRelation, snapshot); + /* + * Vacuum indexes only when we do find AWAITING_DROP segments. + * + * Do index vacuuming before dropping dead segments for data + * consistency and crash safety. If dropping dead segments before + * cleaning up index tuples, the following issues may occur: + * + * a) The dead segment file becomes available as soon as dropping + * complete. Concurrent inserts may fill it with new tuples hence + * might be deleted soon in the following index vacuuming; + * + * b) Crash happens in-between ao_vacuum_rel_recycle_dead_segments() + * and vacuum_appendonly_indexes() result in losing the opportunity + * to clean index entries fully as a state for which index tuples + * to delete will be lost in this case. + * + * So make sure to vacuum indexs to be based on persistent information + * (AWAITING_DROP state in pg_aoseg) to cleanup dead index tuples + * effectively. + */ + vacuum_appendonly_indexes(onerel, options, dead_segs, bstrategy); + /* + * Truncate above collected AWAITING_DROP segments to 0 byte. + * AppendOptimizedCollectDeadSegments() should guarantee that + * no transaction is able to access the dead segments for being + * marked as AWAITING_DROP as well as cutoff xid screening. + * ExclusiveLock will be held in case of concurrent VACUUM being + * on the same file. + */ + AppendOptimizedDropDeadSegments(onerel, dead_segs); } else { - Assert(RelationIsAoCols(aoRelation)); - totals = GetAOCSSSegFilesTotals(aoRelation, snapshot); - } - hidden_tupcount = AppendOnlyVisimap_GetRelationHiddenTupleCount(&vacuumIndexState->visiMap); - - if (rel_tuple_count) - { - *rel_tuple_count = (double)(totals->totaltuples - hidden_tupcount); - Assert((*rel_tuple_count) > -1.0); + /* + * If no AWAITING_DROP segments were found, we called + * vacuum_appendonly_indexes() in post_cleanup phase + * for updating statistics. + */ + if ((options & VACUUM_AO_PHASE_MASK) == VACOPT_AO_POST_CLEANUP_PHASE) + vacuum_appendonly_indexes(onerel, options, dead_segs, bstrategy); } - pfree(totals); - - if (hidden_tupcount > 0 || (options & VACOPT_FULL) != 0) - { - return true; - } - return false; + bms_free(dead_segs); } /* @@ -456,30 +454,18 @@ vacuum_appendonly_index_should_vacuum(Relation aoRelation, * * Perform a vacuum on all indexes of an append-only relation. * - * The page and tuplecount information in vacrelstats are used, the - * nindex value is set by this function. - * * It returns the number of indexes on the relation. */ static int -vacuum_appendonly_indexes(Relation aoRelation, int options, +vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs, BufferAccessStrategy bstrategy) { - int reindex_count = 1; int i; Relation *Irel; int nindexes; - AppendOnlyIndexVacuumState vacuumIndexState; - FileSegInfo **segmentFileInfo = NULL; /* Might be a casted AOCSFileSegInfo */ - int totalSegfiles; - Snapshot appendOnlyMetaDataSnapshot; - Oid visimaprelid; - Oid visimapidxid; Assert(RelationIsAppendOptimized(aoRelation)); - memset(&vacuumIndexState, 0, sizeof(vacuumIndexState)); - if (Debug_appendonly_print_compaction) elog(LOG, "Vacuum indexes for append-only relation %s", RelationGetRelationName(aoRelation)); @@ -490,94 +476,35 @@ vacuum_appendonly_indexes(Relation aoRelation, int options, else vac_open_indexes(aoRelation, RowExclusiveLock, &nindexes, &Irel); - appendOnlyMetaDataSnapshot = GetActiveSnapshot(); - - if (RelationIsAoRows(aoRelation)) - { - segmentFileInfo = GetAllFileSegInfo(aoRelation, - appendOnlyMetaDataSnapshot, - &totalSegfiles, - NULL); - } - else - { - Assert(RelationIsAoCols(aoRelation)); - segmentFileInfo = (FileSegInfo **) GetAllAOCSFileSegInfo(aoRelation, - appendOnlyMetaDataSnapshot, - &totalSegfiles, - NULL); - } - - GetAppendOnlyEntryAuxOids(aoRelation->rd_id, - appendOnlyMetaDataSnapshot, - NULL, NULL, NULL, - &visimaprelid, &visimapidxid); - - AppendOnlyVisimap_Init( - &vacuumIndexState.visiMap, - visimaprelid, - visimapidxid, - AccessShareLock, - appendOnlyMetaDataSnapshot); - - AppendOnlyBlockDirectory_Init_forSearch(&vacuumIndexState.blockDirectory, - appendOnlyMetaDataSnapshot, - segmentFileInfo, - totalSegfiles, - aoRelation, - 1, - RelationIsAoCols(aoRelation), - NULL); - /* Clean/scan index relation(s) */ if (Irel != NULL) { - double rel_tuple_count = 0.0; - int elevel; + int elevel; - /* just scan indexes to update statistic */ if (options & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; - if (vacuum_appendonly_index_should_vacuum(aoRelation, options, - appendOnlyMetaDataSnapshot, - &vacuumIndexState, - &rel_tuple_count)) + /* just scan indexes to update statistic */ + if (Gp_role == GP_ROLE_DISPATCH || bms_is_empty(dead_segs)) { - Assert(rel_tuple_count > -1.0); - for (i = 0; i < nindexes; i++) { - vacuum_appendonly_index(Irel[i], &vacuumIndexState, - rel_tuple_count, - elevel, - bstrategy); + scan_index(Irel[i], Irel[i]->rd_rel->reltuples, elevel, bstrategy); } - reindex_count++; } else { for (i = 0; i < nindexes; i++) - scan_index(Irel[i], &vacuumIndexState, rel_tuple_count, elevel, bstrategy); - } - } - - AppendOnlyVisimap_Finish(&vacuumIndexState.visiMap, AccessShareLock); - AppendOnlyBlockDirectory_End_forSearch(&vacuumIndexState.blockDirectory); - - if (segmentFileInfo) - { - if (RelationIsAoRows(aoRelation)) - { - FreeAllSegFileInfo(segmentFileInfo, totalSegfiles); - } - else - { - FreeAllAOCSSegFileInfo((AOCSFileSegInfo **)segmentFileInfo, totalSegfiles); + { + vacuum_appendonly_index(Irel[i], + Irel[i]->rd_rel->reltuples, + dead_segs, + elevel, + bstrategy); + } } - pfree(segmentFileInfo); } vac_close_indexes(nindexes, Irel, NoLock); @@ -593,18 +520,17 @@ vacuum_appendonly_indexes(Relation aoRelation, int options, */ static void vacuum_appendonly_index(Relation indexRelation, - AppendOnlyIndexVacuumState *vacuumIndexState, double rel_tuple_count, + Bitmapset *dead_segs, int elevel, BufferAccessStrategy bstrategy) { - Assert(RelationIsValid(indexRelation)); - Assert(vacuumIndexState); - IndexBulkDeleteResult *stats; IndexVacuumInfo ivinfo; PGRUsage ru0; + Assert(RelationIsValid(indexRelation)); + pg_rusage_init(&ru0); ivinfo.index = indexRelation; @@ -653,70 +579,26 @@ vacuum_appendonly_index(Relation indexRelation, pfree(stats); } -static bool -appendonly_tid_reaped_check_block_directory(AppendOnlyIndexVacuumState *vacuumState, - AOTupleId *aoTupleId) -{ - if (vacuumState->blockDirectory.currentSegmentFileNum == - AOTupleIdGet_segmentFileNum(aoTupleId) && - AppendOnlyBlockDirectoryEntry_RangeHasRow(&vacuumState->blockDirectoryEntry, - AOTupleIdGet_rowNum(aoTupleId))) - { - return true; - } - - if (!AppendOnlyBlockDirectory_GetEntry(&vacuumState->blockDirectory, - aoTupleId, - 0, - &vacuumState->blockDirectoryEntry)) - { - return false; - } - return (vacuumState->blockDirectory.currentSegmentFileNum == - AOTupleIdGet_segmentFileNum(aoTupleId) && - AppendOnlyBlockDirectoryEntry_RangeHasRow(&vacuumState->blockDirectoryEntry, - AOTupleIdGet_rowNum(aoTupleId))); -} - /* * appendonly_tid_reaped() * - * Is a particular tid for an appendonly reaped? - * state should contain an integer list of all compacted - * segment files. + * Is a particular tid for an appendonly reaped? the inputed state + * is a bitmap of dropped segno. The index entry is reaped only + * because of the segment no is a member of dead_segs. In this + * way, no need to scan visibility map so the performance would be + * good. * * This has the right signature to be an IndexBulkDeleteCallback. */ static bool appendonly_tid_reaped(ItemPointer itemptr, void *state) { - AOTupleId *aoTupleId; - AppendOnlyIndexVacuumState *vacuumState; - bool reaped; - - Assert(itemptr); - Assert(state); + Bitmapset *dead_segs = (Bitmapset *) state; + int segno = AOTupleIdGet_segmentFileNum((AOTupleId *)itemptr); - aoTupleId = (AOTupleId *)itemptr; - vacuumState = (AppendOnlyIndexVacuumState *)state; - - reaped = !appendonly_tid_reaped_check_block_directory(vacuumState, - aoTupleId); - if (!reaped) - { - /* Also check visi map */ - reaped = !AppendOnlyVisimap_IsVisible(&vacuumState->visiMap, - aoTupleId); - } - - if (Debug_appendonly_print_compaction) - ereport(DEBUG3, - (errmsg("Index vacuum %s %d", - AOTupleIdToString(aoTupleId), reaped))); - return reaped; + return bms_is_member(segno, dead_segs); } - /* * Fills in the relation statistics for an append-only relation. * diff --git a/src/include/access/appendonly_compaction.h b/src/include/access/appendonly_compaction.h index e07c5a65a60..ce44d2a4979 100644 --- a/src/include/access/appendonly_compaction.h +++ b/src/include/access/appendonly_compaction.h @@ -21,7 +21,8 @@ #define APPENDONLY_COMPACTION_SEGNO_INVALID (-1) -extern void AppendOptimizedRecycleDeadSegments(Relation aorel); +extern Bitmapset *AppendOptimizedCollectDeadSegments(Relation aorel); +extern void AppendOptimizedDropDeadSegments(Relation aorel, Bitmapset *segnos); extern void AppendOnlyCompact(Relation aorel, int compaction_segno, int *insert_segno, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index daeade87983..08e100da29d 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -387,14 +387,6 @@ extern void lazy_vacuum_rel_heap(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy); /* in commands/vacuum_ao.c */ - -extern void ao_vacuum_rel_pre_cleanup(Relation onerel, int options, VacuumParams *params, - BufferAccessStrategy bstrategy); -extern void ao_vacuum_rel_compact(Relation onerel, int options, VacuumParams *params, - BufferAccessStrategy bstrategy); -extern void ao_vacuum_rel_post_cleanup(Relation onerel, int options, VacuumParams *params, - BufferAccessStrategy bstrategy); - extern void ao_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy); extern bool std_typanalyze(VacAttrStats *stats); diff --git a/src/test/isolation2/input/uao/vacuum_cleanup.source b/src/test/isolation2/input/uao/vacuum_cleanup.source index 1a7ebef0799..fd967a03195 100644 --- a/src/test/isolation2/input/uao/vacuum_cleanup.source +++ b/src/test/isolation2/input/uao/vacuum_cleanup.source @@ -27,8 +27,7 @@ 1: select age(relfrozenxid), regexp_replace(replace(relname, 'ao_@amname@_vacuum_cleanup2'::regclass::oid::text, ''), 'ao.*seg', '') from gp_dist_random('pg_class') where relkind in ('r','t','o','b','M') and (relname like '%' || 'ao_@amname@_vacuum_cleanup2'::regclass::oid || '%') and gp_segment_id = 0; 2: end; --- Check that drop phase is skipped, but still the cleanup phase is performed --- when there are concurrent serializable transactions +-- Check that drop phase is not skipped, when there are concurrent serializable transactions. 1: create table ao_@amname@_vacuum_cleanup3(a int, b int) using @amname@; 1: insert into ao_@amname@_vacuum_cleanup3 select i, i from generate_series(1, 100) i; 1: delete from ao_@amname@_vacuum_cleanup3; @@ -41,9 +40,9 @@ -- Wait till compaction phase is completed and only then start the serializable -- transaction to ensure that only drop phase runs after the serializable --- transaction started. Because the transaction is holding a lock on the table, --- the drop phase cannot upgrade to AccessExclusiveLock and so recycling the --- segfiles is skipped. +-- transaction started. For the new VACUUM strategy, because no need to upgrade +-- to AccessExclusiveLock for drop phase, and the transaction would not access +-- AWAITING_DROP segfiles, dropping the dead segfiles could proceed accordingly. 2: select gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, 1); 2: begin isolation level serializable; 2: select * from ao_@amname@_vacuum_cleanup3; @@ -52,10 +51,145 @@ 1<: 1: select age(relfrozenxid), regexp_replace(replace(relname, 'ao_@amname@_vacuum_cleanup3'::regclass::oid::text, ''), 'ao.*seg', '') from gp_dist_random('pg_class') where relkind in ('r','t','o','b','M') and (relname like '%' || 'ao_@amname@_vacuum_cleanup3'::regclass::oid || '%') and gp_segment_id = 0; --- Validate that the drop phase was skipped. segfile 1 should be in state 2 --- (AWAITING_DROP) +-- Validate that the drop phase wasn't skipped in the new VACUUM strategy, +-- segfile 1 should be in state 1 (AWAITING_DEFAULT). This is because +-- no need to acquire AccessExclusiveLock at dead segments recycling hence +-- the cleanup routine could be performed as is. -- This result is related to data distribution. -- Current hash algorithm is jump-consistent-hash. 1: SELECT * FROM gp_ao_or_aocs_seg('ao_@amname@_vacuum_cleanup3'); 2: commit; + +1q: +2q: + +-- Test VACUUM with concurrent readers: +-- a) if reader transaction started before VACUUM, VACUUM should not drop the AWAITING_DROP segment +-- which was accessing by the reader; +-- b) if reader transaction started after VACUUM, VACUUM should drop the AWAITING_DROP segments. + +create or replace function show_aoseg(tabname text) returns table + (segno int, tupcount bigint, modcount bigint, formatversion smallint, state smallint) as $$ +declare + tabrelid oid; /* in func */ + tabsegrelid oid; /* in func */ + tabsegrelname text; /* in func */ +begin + select tabname::regclass::oid into tabrelid; /* in func */ + select segrelid from pg_appendonly where relid = tabrelid into tabsegrelid; /* in func */ + select relname from pg_class where oid = tabsegrelid into tabsegrelname; /* in func */ + + return query execute 'select segno,tupcount,modcount,formatversion,state from pg_aoseg.' || tabsegrelname; /* in func */ +end; /* in func */ +$$ language plpgsql; + +create table vacuum_concurrent_test_@amname@ (a int, b int, c int) using @amname@; +insert into vacuum_concurrent_test_@amname@ select 2, b, b from generate_series(1, 11) b; +create index i_b_vacuum_concurrent_reader_@amname@ on vacuum_concurrent_test_@amname@(b); +update vacuum_concurrent_test_@amname@ set b = b + 1; + +-- expect segment state is DEFAULT (state == 1) +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + +-- start a reader before VACUUM +1: begin; +1: select * from vacuum_concurrent_test_@amname@ where b = 10; + +vacuum vacuum_concurrent_test_@amname@; + +-- expect to see AWAITING_DROP(state == 2) tuple +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + +1: select * from vacuum_concurrent_test_@amname@ where b = 5; +1: select * from vacuum_concurrent_test_@amname@ order by c; +1: end; + +-- start another reader after VACUUM +2: begin; +2: select * from vacuum_concurrent_test_@amname@ where c = 2; + +-- expect to see AWAITING_DROP(state == 2) tuple for unable to drop the dead segment by the first VACUUM +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + +vacuum vacuum_concurrent_test_@amname@; + +-- expect no AWAITING_DROP(state == 2) tuple because it was dropped by the last VACUUM +0U: select * from show_aoseg('vacuum_concurrent_test_@amname@') order by segno; + +2: select * from vacuum_concurrent_test_@amname@ where b = 7; +2: select * from vacuum_concurrent_test_@amname@ order by c; +2: end; + +1q: +2q: + +-- Test VACUUM with concurrent writer. +-- There was a concurrent insert transaction started prior to VACUUM, VACUUM should not mark +-- the target segment which was also writting by the concurrent writer to AWAITING_DROP, and +-- the corresponding index entries should not be deleted. + +truncate table vacuum_concurrent_test_@amname@; +insert into vacuum_concurrent_test_@amname@ select 2, b, b from generate_series(1, 5) b; +delete from vacuum_concurrent_test_@amname@; + +1: begin; +1: insert into vacuum_concurrent_test_@amname@ select 2, b, b from generate_series(6, 10) b; + +2: vacuum vacuum_concurrent_test_@amname@; + +1: commit; + +set enable_seqscan = on; +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ order by b; +-- end_ignore +select * from vacuum_concurrent_test_@amname@ order by b; + +-- expect all bitmapindexscan results are consistent with above seqscan +set enable_seqscan = off; +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ where b = b order by b; +-- end_ignore +select * from vacuum_concurrent_test_@amname@ where b = b order by b; + +-- expose dead tuples +set gp_select_invisible = true; + +set enable_seqscan = on; +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ order by b; +-- end_ignore +select * from vacuum_concurrent_test_@amname@ order by b; + +-- expect all bitmapindexscan results are same as above seqscan +set enable_seqscan = off; +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ where b = b order by b; +-- end_ignore +select * from vacuum_concurrent_test_@amname@ where b = b order by b; + +-- vacuum again without concurrent reader/writer, expect above dead tuples could be removed +2: vacuum vacuum_concurrent_test_@amname@; + +set enable_seqscan = on; +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ order by b; +-- end_ignore +select * from vacuum_concurrent_test_@amname@ order by b; + +-- expect all bitmapindexscan results are same as seqscan +set enable_seqscan = off; +-- start_ignore +explain (costs off) select * from vacuum_concurrent_test_@amname@ where b = b order by b; +-- end_ignore +select * from vacuum_concurrent_test_@amname@ where b = b order by b; + +1q: +2q: + +reset enable_seqscan; +reset gp_select_invisible; + +drop table vacuum_concurrent_test_@amname@; +drop function show_aoseg; diff --git a/src/test/isolation2/input/uao/vacuum_index_stats.source b/src/test/isolation2/input/uao/vacuum_index_stats.source new file mode 100644 index 00000000000..139ef71849a --- /dev/null +++ b/src/test/isolation2/input/uao/vacuum_index_stats.source @@ -0,0 +1,63 @@ +-- This is intended to test a new behavior of VACUUM AO/CO enhancement. +-- The enhacement introduced a new strategy to improve performance by +-- vacuuming indexes based on the collected AWAITING_DROP segment files, +-- instead of reading AO/CO visibility map catalog for every index tuple. +-- This behavior would lead to the index->reltuples being updated only when +-- AWAITING_DROP segment is greater than 0, which requires compaction during +-- VACUUM. If no compaction happens, even if dead tuples were deleted, +-- index->reltuples wouldn't get updated accordingly, which could generate +-- difference between table->reltuples and index->reltuples. That is supposed +-- to be fine in most cases since bloating size of indexes is limited in +-- the scope of gp_appendonly_compaction_threshold percentage of total tuples. +-- The new strategy would not impact table->reltuples updates. + +create table vacuum_index_stats_@amname@ (a int, b int, c int) using @amname@; +insert into vacuum_index_stats_@amname@ select 2, b, b from generate_series(1, 11) b; +create index i_b_vacuum_index_stats_@amname@ on vacuum_index_stats_@amname@(b); + +set gp_appendonly_compaction_threshold = 10; +analyze vacuum_index_stats_@amname@; + +-- expect reltuples == 11 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; +-- expect reltuples == 11 +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + +-- delete one tuple +delete from vacuum_index_stats_@amname@ where c = 1; +vacuum vacuum_index_stats_@amname@; + +-- hideRatio = hiddenTupcount / totalTupcount * 100 = 1 / 11 * 100 = 9% +-- less than gp_appendonly_compaction_threshold (10%), no compaction would happen +-- during vacuum, expect no change in reltuples of the index but decrease 1 in +-- reltuples of the table. + +-- expect reltuples == 10 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; +-- expect reltuples == 11 for no compaction happened +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + +analyze vacuum_index_stats_@amname@; + +-- expect reltuples == 10 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; +-- expect reltuples == 10 +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + +-- delete two tuples +delete from vacuum_index_stats_@amname@ where c < 4; +vacuum vacuum_index_stats_@amname@; + +-- hideRatio = hiddenTupcount / totalTupcount * 100 = 2 / 10 * 100 = 20% +-- greater than gp_appendonly_compaction_threshold (10%), compaction would happen +-- during vacuum, expect changes in reltuples for both index and table. + +-- expect reltuples == 8 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; +-- expect reltuples == 8 for compaction happened +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + +0Uq: + +drop table vacuum_index_stats_@amname@; +reset gp_appendonly_compaction_threshold; diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index af9cd7c13ae..66f6fd7f2d0 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -151,6 +151,7 @@ test: uao/vacuum_self_function_row test: uao/vacuum_while_insert_row test: uao/vacuum_while_vacuum_row test: uao/vacuum_cleanup_row +test: uao/vacuum_index_stats_row test: uao/bitmapindex_rescan_row test: uao/limit_indexscan_inits_row test: uao/create_index_allows_readonly_row @@ -207,6 +208,7 @@ test: uao/vacuum_self_function_column test: uao/vacuum_while_insert_column test: uao/vacuum_while_vacuum_column test: uao/vacuum_cleanup_column +test: uao/vacuum_index_stats_column test: uao/bitmapindex_rescan_column test: uao/limit_indexscan_inits_column test: uao/create_index_allows_readonly_column diff --git a/src/test/isolation2/output/uao/vacuum_cleanup.source b/src/test/isolation2/output/uao/vacuum_cleanup.source index 9f4fb529184..b90c82bfd68 100644 --- a/src/test/isolation2/output/uao/vacuum_cleanup.source +++ b/src/test/isolation2/output/uao/vacuum_cleanup.source @@ -46,14 +46,13 @@ VACUUM 1: select age(relfrozenxid), regexp_replace(replace(relname, 'ao_@amname@_vacuum_cleanup2'::regclass::oid::text, ''), 'ao.*seg', '') from gp_dist_random('pg_class') where relkind in ('r','t','o','b','M') and (relname like '%' || 'ao_@amname@_vacuum_cleanup2'::regclass::oid || '%') and gp_segment_id = 0; age | regexp_replace -----+-------------------- - 2 | pg__ - 2 | pg_aovisimap_ + 1 | pg__ + 1 | pg_aovisimap_ (2 rows) 2: end; END --- Check that drop phase is skipped, but still the cleanup phase is performed --- when there are concurrent serializable transactions +-- Check that drop phase is not skipped, when there are concurrent serializable transactions. 1: create table ao_@amname@_vacuum_cleanup3(a int, b int) using @amname@; CREATE 1: insert into ao_@amname@_vacuum_cleanup3 select i, i from generate_series(1, 100) i; @@ -78,9 +77,9 @@ DELETE 100 -- Wait till compaction phase is completed and only then start the serializable -- transaction to ensure that only drop phase runs after the serializable --- transaction started. Because the transaction is holding a lock on the table, --- the drop phase cannot upgrade to AccessExclusiveLock and so recycling the --- segfiles is skipped. +-- transaction started. For the new VACUUM strategy, because no need to upgrade +-- to AccessExclusiveLock for drop phase, and the transaction would not access +-- AWAITING_DROP segfiles, dropping the dead segfiles could proceed accordingly. 2: select gp_wait_until_triggered_fault('vacuum_relation_open_relation_during_drop_phase', 1, 1); gp_wait_until_triggered_fault ------------------------------- @@ -107,8 +106,10 @@ VACUUM 2 | pg_aovisimap_ (2 rows) --- Validate that the drop phase was skipped. segfile 1 should be in state 2 --- (AWAITING_DROP) +-- Validate that the drop phase wasn't skipped in the new VACUUM strategy, +-- segfile 1 should be in state 1 (AWAITING_DEFAULT). This is because +-- no need to acquire AccessExclusiveLock at dead segments recycling hence +-- the cleanup routine could be performed as is. -- This result is related to data distribution. -- Current hash algorithm is jump-consistent-hash. 1: SELECT * FROM gp_ao_or_aocs_seg('ao_@amname@_vacuum_cleanup3'); diff --git a/src/test/isolation2/output/uao/vacuum_index_stats.source b/src/test/isolation2/output/uao/vacuum_index_stats.source new file mode 100644 index 00000000000..ac47b47913f --- /dev/null +++ b/src/test/isolation2/output/uao/vacuum_index_stats.source @@ -0,0 +1,107 @@ +-- This is intended to test a new behavior of VACUUM AO/CO enhancement. +-- The enhacement introduced a new strategy to improve performance by +-- vacuuming indexes based on the collected AWAITING_DROP segment files, +-- instead of reading AO/CO visibility map catalog for every index tuple. +-- This behavior would lead to the index->reltuples being updated only when +-- AWAITING_DROP segment is greater than 0, which requires compaction during +-- VACUUM. If no compaction happens, even if dead tuples were deleted, +-- index->reltuples wouldn't get updated accordingly, which could generate +-- difference between table->reltuples and index->reltuples. That is supposed +-- to be fine in most cases since bloating size of indexes is limited in +-- the scope of gp_appendonly_compaction_threshold percentage of total tuples. +-- The new strategy would not impact table->reltuples updates. + +create table vacuum_index_stats_@amname@ (a int, b int, c int) using @amname@; +CREATE +insert into vacuum_index_stats_@amname@ select 2, b, b from generate_series(1, 11) b; +INSERT 11 +create index i_b_vacuum_index_stats_@amname@ on vacuum_index_stats_@amname@(b); +CREATE + +set gp_appendonly_compaction_threshold = 10; +SET +analyze vacuum_index_stats_@amname@; +ANALYZE + +-- expect reltuples == 11 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; + reltuples +----------- + 11 +(1 row) +-- expect reltuples == 11 +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + reltuples +----------- + 11 +(1 row) + +-- delete one tuple +delete from vacuum_index_stats_@amname@ where c = 1; +DELETE 1 +vacuum vacuum_index_stats_@amname@; +VACUUM + +-- hideRatio = hiddenTupcount / totalTupcount * 100 = 1 / 11 * 100 = 9% +-- less than gp_appendonly_compaction_threshold (10%), no compaction would happen +-- during vacuum, expect no change in reltuples of the index but decrease 1 in +-- reltuples of the table. + +-- expect reltuples == 10 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; + reltuples +----------- + 10 +(1 row) +-- expect reltuples == 11 for no compaction happened +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + reltuples +----------- + 11 +(1 row) + +analyze vacuum_index_stats_@amname@; +ANALYZE + +-- expect reltuples == 10 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; + reltuples +----------- + 10 +(1 row) +-- expect reltuples == 10 +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + reltuples +----------- + 10 +(1 row) + +-- delete two tuples +delete from vacuum_index_stats_@amname@ where c < 4; +DELETE 2 +vacuum vacuum_index_stats_@amname@; +VACUUM + +-- hideRatio = hiddenTupcount / totalTupcount * 100 = 2 / 10 * 100 = 20% +-- greater than gp_appendonly_compaction_threshold (10%), compaction would happen +-- during vacuum, expect changes in reltuples for both index and table. + +-- expect reltuples == 8 +0U: select reltuples from pg_class where relname = 'vacuum_index_stats_@amname@'; + reltuples +----------- + 8 +(1 row) +-- expect reltuples == 8 for compaction happened +0U: select reltuples from pg_class where relname = 'i_b_vacuum_index_stats_@amname@'; + reltuples +----------- + 8 +(1 row) + +0Uq: ... + +drop table vacuum_index_stats_@amname@; +DROP +reset gp_appendonly_compaction_threshold; +RESET diff --git a/src/test/regress/expected/brin_ao.out b/src/test/regress/expected/brin_ao.out index 7f4b6e592bb..b0d4b234a64 100644 --- a/src/test/regress/expected/brin_ao.out +++ b/src/test/regress/expected/brin_ao.out @@ -466,9 +466,15 @@ CONTEXT: SQL function "brin_summarize_new_values" statement 1 SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index ERROR: "tenk1_unique1" is not a BRIN index CONTEXT: SQL function "brin_summarize_new_values" statement 1 +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- brin_summarize_new_values() will not always be accurate. So ignore the check to +-- coordinate with the new behavior. +-- start_ignore SELECT brin_summarize_new_values('brinaoidx'); -- ok, no change expected brin_summarize_new_values --------------------------- 0 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/brin_ao_optimizer.out b/src/test/regress/expected/brin_ao_optimizer.out index 02caa9f2c83..7038d21493b 100644 --- a/src/test/regress/expected/brin_ao_optimizer.out +++ b/src/test/regress/expected/brin_ao_optimizer.out @@ -489,9 +489,15 @@ CONTEXT: SQL function "brin_summarize_new_values" statement 1 SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index ERROR: "tenk1_unique1" is not a BRIN index CONTEXT: SQL function "brin_summarize_new_values" statement 1 +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- brin_summarize_new_values() will not always be accurate. So ignore the check to +-- coordinate with the new behavior. +-- start_ignore SELECT brin_summarize_new_values('brinaoidx'); -- ok, no change expected brin_summarize_new_values --------------------------- 0 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/brin_aocs.out b/src/test/regress/expected/brin_aocs.out index 0fa29bb2581..69555f8297c 100644 --- a/src/test/regress/expected/brin_aocs.out +++ b/src/test/regress/expected/brin_aocs.out @@ -466,9 +466,15 @@ CONTEXT: SQL function "brin_summarize_new_values" statement 1 SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index ERROR: "tenk1_unique1" is not a BRIN index CONTEXT: SQL function "brin_summarize_new_values" statement 1 +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- brin_summarize_new_values() will not always be accurate. So ignore the check to +-- coordinate with the new behavior. +-- start_ignore SELECT brin_summarize_new_values('brinaocsidx'); -- ok, no change expected brin_summarize_new_values --------------------------- 0 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/brin_aocs_optimizer.out b/src/test/regress/expected/brin_aocs_optimizer.out index 5a83c375758..3ba2a58bb2a 100644 --- a/src/test/regress/expected/brin_aocs_optimizer.out +++ b/src/test/regress/expected/brin_aocs_optimizer.out @@ -489,9 +489,15 @@ CONTEXT: SQL function "brin_summarize_new_values" statement 1 SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index ERROR: "tenk1_unique1" is not a BRIN index CONTEXT: SQL function "brin_summarize_new_values" statement 1 +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- brin_summarize_new_values() will not always be accurate. So ignore the check to +-- coordinate with the new behavior. +-- start_ignore SELECT brin_summarize_new_values('brinaocsidx'); -- ok, no change expected brin_summarize_new_values --------------------------- 0 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/uao_compaction/drop_column.out b/src/test/regress/expected/uao_compaction/drop_column.out index 56b9f7dcb94..898b82b172f 100644 --- a/src/test/regress/expected/uao_compaction/drop_column.out +++ b/src/test/regress/expected/uao_compaction/drop_column.out @@ -29,12 +29,18 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_drop_col'; uao_drop_col | 7 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_drop_col_index'; relname | reltuples --------------------+----------- uao_drop_col_index | 7 (1 row) +-- end_ignore ALTER TABLE uao_drop_col DROP COLUMN c; SELECT * FROM uao_drop_col; a | b diff --git a/src/test/regress/expected/uao_compaction/full_stats.out b/src/test/regress/expected/uao_compaction/full_stats.out index 5530c1065d9..1f9d473aa7a 100644 --- a/src/test/regress/expected/uao_compaction/full_stats.out +++ b/src/test/regress/expected/uao_compaction/full_stats.out @@ -26,9 +26,15 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_full_stats'; uao_full_stats | 85 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_full_stats_index'; relname | reltuples ----------------------+----------- uao_full_stats_index | 85 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/uao_compaction/index_stats.out b/src/test/regress/expected/uao_compaction/index_stats.out index e5a9fe4ac69..1c32cfd8caa 100644 --- a/src/test/regress/expected/uao_compaction/index_stats.out +++ b/src/test/regress/expected/uao_compaction/index_stats.out @@ -29,9 +29,15 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab'; mytab | 2 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab_int_idx1'; relname | reltuples ----------------+----------- mytab_int_idx1 | 2 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/uao_compaction/stats.out b/src/test/regress/expected/uao_compaction/stats.out index d34cfede4fc..a18834d43e3 100644 --- a/src/test/regress/expected/uao_compaction/stats.out +++ b/src/test/regress/expected/uao_compaction/stats.out @@ -27,9 +27,15 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_stats'; uao_stats | 85 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_stats_index'; relname | reltuples -----------------+----------- - uao_stats_index | 85 + uao_stats_index | 88 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/uaocs_compaction/drop_column.out b/src/test/regress/expected/uaocs_compaction/drop_column.out index 3b867c5ca15..5d9579e2e0b 100644 --- a/src/test/regress/expected/uaocs_compaction/drop_column.out +++ b/src/test/regress/expected/uaocs_compaction/drop_column.out @@ -29,12 +29,18 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_drop'; uaocs_drop | 7 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_drop_index'; relname | reltuples ------------------+----------- uaocs_drop_index | 7 (1 row) +-- end_ignore ALTER TABLE uaocs_drop DROP COLUMN c; SELECT * FROM uaocs_drop; a | b diff --git a/src/test/regress/expected/uaocs_compaction/full_stats.out b/src/test/regress/expected/uaocs_compaction/full_stats.out index fcdad3daf5b..e5d7825ecf1 100644 --- a/src/test/regress/expected/uaocs_compaction/full_stats.out +++ b/src/test/regress/expected/uaocs_compaction/full_stats.out @@ -44,9 +44,15 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_full_stats'; uaocs_full_stats | 85 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_full_stats_index'; relname | reltuples ------------------------+----------- uaocs_full_stats_index | 85 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/uaocs_compaction/index_stats.out b/src/test/regress/expected/uaocs_compaction/index_stats.out index 36bd145d2a8..adc286c0811 100644 --- a/src/test/regress/expected/uaocs_compaction/index_stats.out +++ b/src/test/regress/expected/uaocs_compaction/index_stats.out @@ -34,9 +34,15 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats'; uaocs_index_stats | 2 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats_int_idx1'; relname | reltuples ----------------------------+----------- uaocs_index_stats_int_idx1 | 2 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/expected/uaocs_compaction/stats.out b/src/test/regress/expected/uaocs_compaction/stats.out index 07678823d00..f643fb4dcc7 100644 --- a/src/test/regress/expected/uaocs_compaction/stats.out +++ b/src/test/regress/expected/uaocs_compaction/stats.out @@ -27,9 +27,15 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_stats'; uaocs_stats | 85 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_stats_index'; relname | reltuples -------------------+----------- uaocs_stats_index | 85 (1 row) +-- end_ignore \ No newline at end of file diff --git a/src/test/regress/input/uao_dml/uao_dml.source b/src/test/regress/input/uao_dml/uao_dml.source index c1206bea64f..adb9f033b70 100644 --- a/src/test/regress/input/uao_dml/uao_dml.source +++ b/src/test/regress/input/uao_dml/uao_dml.source @@ -479,8 +479,14 @@ update mytab_@amname@ set col_text=' new value' where col_int = 1; select * from mytab_@amname@; vacuum mytab_@amname@; SELECT reltuples FROM pg_class WHERE relname = 'mytab_@amname@'; -SELECT reltuples FROM pg_class WHERE relname = 'mytab_int_idx1_@amname@'; +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore +SELECT reltuples FROM pg_class WHERE relname = 'mytab_int_idx1_@amname@'; +-- end_ignore -- @Description Checks that deleting works with many AO blocks. -- diff --git a/src/test/regress/output/uao_dml/uao_dml.source b/src/test/regress/output/uao_dml/uao_dml.source index 9ecf578eb83..f07b1b8de1e 100644 --- a/src/test/regress/output/uao_dml/uao_dml.source +++ b/src/test/regress/output/uao_dml/uao_dml.source @@ -939,12 +939,18 @@ SELECT reltuples FROM pg_class WHERE relname = 'mytab_@amname@'; 2 (1 row) +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT reltuples FROM pg_class WHERE relname = 'mytab_int_idx1_@amname@'; reltuples ----------- 2 (1 row) +-- end_ignore -- @Description Checks that deleting works with many AO blocks. -- DROP TABLE IF EXISTS foo; diff --git a/src/test/regress/sql/brin_ao.sql b/src/test/regress/sql/brin_ao.sql index 68057a1fdd9..0c624a08276 100644 --- a/src/test/regress/sql/brin_ao.sql +++ b/src/test/regress/sql/brin_ao.sql @@ -464,4 +464,10 @@ SELECT segment_id, segno, tupcount, state FROM gp_toolkit.__gp_aoseg('brintest_a -- Tests for brin_summarize_new_values SELECT brin_summarize_new_values('brintest_ao'); -- error, not an index SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- brin_summarize_new_values() will not always be accurate. So ignore the check to +-- coordinate with the new behavior. +-- start_ignore SELECT brin_summarize_new_values('brinaoidx'); -- ok, no change expected +-- end_ignore diff --git a/src/test/regress/sql/brin_aocs.sql b/src/test/regress/sql/brin_aocs.sql index 8e9d4c704b5..ff182c0aaf3 100644 --- a/src/test/regress/sql/brin_aocs.sql +++ b/src/test/regress/sql/brin_aocs.sql @@ -464,4 +464,10 @@ SELECT segment_id, segno, tupcount, state FROM gp_toolkit.__gp_aocsseg('brintest -- Tests for brin_summarize_new_values SELECT brin_summarize_new_values('brintest_aocs'); -- error, not an index SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- brin_summarize_new_values() will not always be accurate. So ignore the check to +-- coordinate with the new behavior. +-- start_ignore SELECT brin_summarize_new_values('brinaocsidx'); -- ok, no change expected +-- end_ignore diff --git a/src/test/regress/sql/uao_compaction/drop_column.sql b/src/test/regress/sql/uao_compaction/drop_column.sql index b86ef526af8..83e542be3c9 100644 --- a/src/test/regress/sql/uao_compaction/drop_column.sql +++ b/src/test/regress/sql/uao_compaction/drop_column.sql @@ -11,7 +11,13 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_drop_col'; SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_drop_col_index'; VACUUM uao_drop_col; SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_drop_col'; +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_drop_col_index'; +-- end_ignore ALTER TABLE uao_drop_col DROP COLUMN c; SELECT * FROM uao_drop_col; INSERT INTO uao_drop_col VALUES (42, 42); diff --git a/src/test/regress/sql/uao_compaction/full_stats.sql b/src/test/regress/sql/uao_compaction/full_stats.sql index 5299ffd5e9a..f8361ea0a94 100644 --- a/src/test/regress/sql/uao_compaction/full_stats.sql +++ b/src/test/regress/sql/uao_compaction/full_stats.sql @@ -13,4 +13,10 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_full_stats_index'; DELETE FROM uao_full_stats WHERE a < 16; VACUUM FULL uao_full_stats; SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_full_stats'; +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_full_stats_index'; +-- end_ignore diff --git a/src/test/regress/sql/uao_compaction/index_stats.sql b/src/test/regress/sql/uao_compaction/index_stats.sql index d8c3ea68538..05ad79953ef 100644 --- a/src/test/regress/sql/uao_compaction/index_stats.sql +++ b/src/test/regress/sql/uao_compaction/index_stats.sql @@ -16,4 +16,11 @@ update mytab set col_text=' new value' where col_int = 1; select * from mytab; vacuum mytab; SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab'; + +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'mytab_int_idx1'; +-- end_ignore diff --git a/src/test/regress/sql/uao_compaction/stats.sql b/src/test/regress/sql/uao_compaction/stats.sql index 8be71e52673..bd2c2ce8c0c 100644 --- a/src/test/regress/sql/uao_compaction/stats.sql +++ b/src/test/regress/sql/uao_compaction/stats.sql @@ -14,4 +14,11 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_stats_index'; DELETE FROM uao_stats WHERE a < 16; VACUUM uao_stats; SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_stats'; + +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uao_stats_index'; +-- end_ignore diff --git a/src/test/regress/sql/uaocs_compaction/drop_column.sql b/src/test/regress/sql/uaocs_compaction/drop_column.sql index 8b7240c05ad..3e66ab489f4 100644 --- a/src/test/regress/sql/uaocs_compaction/drop_column.sql +++ b/src/test/regress/sql/uaocs_compaction/drop_column.sql @@ -10,7 +10,13 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_drop'; SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_drop_index'; VACUUM uaocs_drop; SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_drop'; +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_drop_index'; +-- end_ignore ALTER TABLE uaocs_drop DROP COLUMN c; SELECT * FROM uaocs_drop; INSERT INTO uaocs_drop VALUES (42, 42); diff --git a/src/test/regress/sql/uaocs_compaction/full_stats.sql b/src/test/regress/sql/uaocs_compaction/full_stats.sql index f50d1b25c56..92780646eac 100644 --- a/src/test/regress/sql/uaocs_compaction/full_stats.sql +++ b/src/test/regress/sql/uaocs_compaction/full_stats.sql @@ -15,4 +15,10 @@ SELECT COUNT(*) FROM uaocs_full_stats; VACUUM FULL uaocs_full_stats; SELECT COUNT(*) FROM uaocs_full_stats; SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_full_stats'; +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_full_stats_index'; +-- end_ignore diff --git a/src/test/regress/sql/uaocs_compaction/index_stats.sql b/src/test/regress/sql/uaocs_compaction/index_stats.sql index 8ea8e30a00a..c93ee37f686 100644 --- a/src/test/regress/sql/uaocs_compaction/index_stats.sql +++ b/src/test/regress/sql/uaocs_compaction/index_stats.sql @@ -16,4 +16,11 @@ update uaocs_index_stats set col_text=' new value' where col_int = 1; select * from uaocs_index_stats; vacuum uaocs_index_stats; SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats'; + +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_index_stats_int_idx1'; +-- end_ignore diff --git a/src/test/regress/sql/uaocs_compaction/stats.sql b/src/test/regress/sql/uaocs_compaction/stats.sql index c916429dd28..9e483641670 100644 --- a/src/test/regress/sql/uaocs_compaction/stats.sql +++ b/src/test/regress/sql/uaocs_compaction/stats.sql @@ -14,4 +14,11 @@ SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_stats_index'; DELETE FROM uaocs_stats WHERE a < 16; VACUUM uaocs_stats; SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_stats'; + +-- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. +-- Index dead tuples will not always be cleaned up completely after VACUUM, resulting +-- index stats pg_class->reltuples will not always be accurate. So ignore the stats check +-- for reltuples to coordinate with the new behavior. +-- start_ignore SELECT relname, reltuples FROM pg_class WHERE relname = 'uaocs_stats_index'; +-- end_ignore From 3652d87d3156989337c557c301a39490607ff392 Mon Sep 17 00:00:00 2001 From: Haolin Wang Date: Thu, 14 Jul 2022 17:10:53 +0800 Subject: [PATCH 18/19] Remove dead segments recycling logic from ALTER TABLE ADD COLUMN scenario. We used to call AppendOptimizedRecycleDeadSegments() (current name is ao_vacuum_rel_recycle_dead_segments) to recycle those segfiles to save spaces in AT ADD COLUMN scenario. But it didn't do corresponding index tuples cleanup for unknown reason. With new VACUUM AO strategy, we did refactor for AppendOptimizedRecycleDeadSegments() a little bit and combine dead segfiles cleanup with corresponding indexes cleanup together. While it seems to be impossible to pass index vacuuming parameter in this scenario, so we removed AppendOptimizedRecycleDeadSegments() from this scenario and dedicated it to be called only in VACUUM scenario. --- src/backend/commands/tablecmds.c | 22 +++++++++++++++++-- ...d_column_after_vacuum_skip_drop_column.out | 22 ++++++++++--------- ...d_column_after_vacuum_skip_drop_column.sql | 4 +++- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 0bac5c35cba..ab15d32e47c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -6789,8 +6789,26 @@ ATAocsWriteNewColumns(AlteredTableInfo *tab) rel = heap_open(tab->relid, NoLock); Assert(RelationIsAoCols(rel)); - /* Try to recycle any old segfiles first. */ - AppendOptimizedRecycleDeadSegments(rel); + /* + * There might be AWAITING_DROP segments occupying spaces for failing + * to drop at VACUUM in the case of cleaning up happened concurrently + * with earlier readers which was accessing the dead segment files. + * + * We used to call AppendOptimizedRecycleDeadSegments() (current name is + * ao_vacuum_rel_recycle_dead_segments) to recycle those segfiles to save + * spaces in this scenario. But it didn't do corresponding index tuples + * cleanup for unknown reason. + * + * After optimizing VACUUM AO strategy, we did refactor for + * AppendOptimizedRecycleDeadSegments() a little bit and combine + * dead segfiles cleanup with corresponding indexes cleanup together. + * While it seems to be impossible to pass index vacuuming parameter in + * this scenario, so we removed AppendOptimizedRecycleDeadSegments() out + * of this function and dedicated it to be called only in VACUUM scenario. + * + * We are supposed to be fine without recycling spaces here, or find + * another way to fix it if that does become a real problem. + */ segInfos = GetAllAOCSFileSegInfo(rel, snapshot, &nseg, NULL); basepath = relpathbackend(rel->rd_node, rel->rd_backend, MAIN_FORKNUM); diff --git a/src/test/isolation2/expected/add_column_after_vacuum_skip_drop_column.out b/src/test/isolation2/expected/add_column_after_vacuum_skip_drop_column.out index 74b4423cc06..62468f7904a 100644 --- a/src/test/isolation2/expected/add_column_after_vacuum_skip_drop_column.out +++ b/src/test/isolation2/expected/add_column_after_vacuum_skip_drop_column.out @@ -1,5 +1,7 @@ -- @Description Ensures that an ALTER TABLE ADD COLUMN will drop segfiles in --- AOSEG_STATE_AWAITING_DROP state left over by a previous vacuum +-- AOSEG_STATE_AWAITING_DROP state left over by a previous vacuum. +-- We removed recycling dead segfiles from ADD COLUMN workflow, so +-- the test expected result were adjusted accordingly. -- CREATE TABLE aoco_add_column_after_vacuum_skip_drop (a INT, b INT) WITH (appendonly=true, orientation=column); CREATE @@ -46,9 +48,9 @@ ALTER 0U: SELECT segno, column_num, state FROM gp_toolkit.__gp_aocsseg('aoco_add_column_after_vacuum_skip_drop'); segno | column_num | state -------+------------+------- - 1 | 0 | 1 - 1 | 1 | 1 - 1 | 2 | 1 + 1 | 0 | 2 + 1 | 1 | 2 + 1 | 2 | 2 2 | 0 | 1 2 | 1 | 1 2 | 2 | 1 @@ -60,10 +62,10 @@ INSERT 100 0U: SELECT segno, tupcount > 0, state FROM gp_toolkit.__gp_aocsseg('aoco_add_column_after_vacuum_skip_drop'); segno | ?column? | state -------+----------+------- - 1 | t | 1 - 1 | t | 1 - 1 | t | 1 - 2 | f | 1 - 2 | f | 1 - 2 | f | 1 + 1 | t | 2 + 1 | t | 2 + 1 | t | 2 + 2 | t | 1 + 2 | t | 1 + 2 | t | 1 (6 rows) diff --git a/src/test/isolation2/sql/add_column_after_vacuum_skip_drop_column.sql b/src/test/isolation2/sql/add_column_after_vacuum_skip_drop_column.sql index f1db7898cfb..2c56d0eea69 100644 --- a/src/test/isolation2/sql/add_column_after_vacuum_skip_drop_column.sql +++ b/src/test/isolation2/sql/add_column_after_vacuum_skip_drop_column.sql @@ -1,5 +1,7 @@ -- @Description Ensures that an ALTER TABLE ADD COLUMN will drop segfiles in --- AOSEG_STATE_AWAITING_DROP state left over by a previous vacuum +-- AOSEG_STATE_AWAITING_DROP state left over by a previous vacuum. +-- We removed recycling dead segfiles from ADD COLUMN workflow, so +-- the test expected result were adjusted accordingly. -- CREATE TABLE aoco_add_column_after_vacuum_skip_drop (a INT, b INT) WITH (appendonly=true, orientation=column); INSERT INTO aoco_add_column_after_vacuum_skip_drop SELECT i as a, i as b FROM generate_series(1, 10) AS i; From fe505a34f9082cbbbb2aca8446d86c2c08da4668 Mon Sep 17 00:00:00 2001 From: liushengsong Date: Thu, 20 Jul 2023 17:25:52 +0800 Subject: [PATCH 19/19] Add unique index for AO/AOCS table. Firstly, we remove the fill hole mechanism in the BlockDirectory to support the unique index for AO table. In order to distinguish it from the previous BlockDirectory design that did not support unique index, we add the "version" field to the pg_appendonly table. Implement the insert, delete and update of AO table for the unique index. Since the Memtuple in the AO table does not have the xmin and xmax field, we implement unique check by using its auxiliary index structure BlockDirectory (Heaptuple). Specifically, for the scenario with a unique index, when inserting into the AO table, we insert a placeholder in BlockDirectory to block the insertion of the same key. In order to adapt to the insertMultiFiles mechanism, we add a field to each segment file to identify whether the placeholder has alreadly been inserted in the BlockDirectory. Change the vacuum implementation of the AO table and index. When the number of dead tuples in the segment reaches the threshold, we drop the segment and recycle the segment and the corresponding index. Adjust a few function signatures and tests, and add tests and comments. --- src/backend/access/aocs/aocsam.c | 12 +-- src/backend/access/aocs/aocsam_handler.c | 98 +++++++++++-------- src/backend/access/appendonly/appendonlyam.c | 10 +- .../access/appendonly/appendonlyam_handler.c | 58 ++++++----- src/backend/catalog/aoblkdir.c | 56 ----------- src/backend/catalog/pg_appendonly.c | 19 +++- src/backend/commands/indexcmds.c | 12 +-- src/backend/commands/vacuum.c | 2 +- src/backend/commands/vacuum_ao.c | 20 ++-- src/backend/storage/lmgr/lock.c | 13 --- src/backend/utils/misc/guc_gp.c | 12 --- src/include/catalog/pg_appendonly.h | 33 +++---- src/include/cdb/cdbaocsam.h | 8 +- src/include/cdb/cdbappendonlyam.h | 3 + src/include/commands/vacuum.h | 2 +- src/include/nodes/execnodes.h | 2 +- src/include/utils/guc.h | 1 - .../isolation2/expected/ao_unique_index.out | 49 ++++++++++ .../isolation2/expected/aocs_unique_index.out | 49 ++++++++++ .../input/uao/ao_unique_index_vacuum.source | 16 +-- .../uao/test_pg_appendonly_version.source | 6 +- src/test/isolation2/isolation2_schedule | 2 - .../output/uao/ao_unique_index_vacuum.source | 38 +++---- .../uao/test_pg_appendonly_version.source | 6 +- src/test/isolation2/sql/ao_unique_index.sql | 34 +++++++ src/test/isolation2/sql/aocs_unique_index.sql | 34 +++++++ 26 files changed, 344 insertions(+), 251 deletions(-) diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index 717f4d848ea..2e580a7f23a 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -868,7 +868,7 @@ aocs_getnext(AOCSScanDesc scan, ScanDirection direction, TupleTableSlot *slot) /* * Perform any required upgrades on the Datum we just fetched. */ - if (curseginfo->formatversion < AORelationVersion_GetLatest()) + if (curseginfo->formatversion < AOSegfileFormatVersion_GetLatest ()) { upgrade_datum_scan(scan, attno, d, null, curseginfo->formatversion); @@ -1317,7 +1317,7 @@ fetchFromCurrentBlock(AOCSFetchDesc aocsFetchDesc, /* * Perform any required upgrades on the Datum we just fetched. */ - if (formatversion < AORelationVersion_GetLatest()) + if (formatversion < AOSegfileFormatVersion_GetLatest ()) { upgrade_datum_fetch(aocsFetchDesc, colno, values, nulls, formatversion); @@ -1337,7 +1337,7 @@ scanToFetchValue(AOCSFetchDesc aocsFetchDesc, { DatumStreamFetchDesc datumStreamFetchDesc = aocsFetchDesc->datumStreamFetchDesc[colno]; DatumStreamRead *datumStream = datumStreamFetchDesc->datumStream; - AOFetchBlockMetadata *currentBlock = &datumStreamFetchDesc->currentBlock; + CurrentBlock *currentBlock = &datumStreamFetchDesc->currentBlock; AppendOnlyBlockDirectoryEntry *entry = ¤tBlock->blockDirectoryEntry; bool found; @@ -1351,10 +1351,10 @@ scanToFetchValue(AOCSFetchDesc aocsFetchDesc, /* * We fell into a hole inside the resolved block directory entry * we obtained from AppendOnlyBlockDirectory_GetEntry(). - * This should not be happening for versions >= GP7. Scream + * This should not be happening for versions >= CB2. Scream * appropriately. See AppendOnlyBlockDirectoryEntry for details. */ - ereportif(aocsFetchDesc->relation->rd_appendonly->version >= AORelationVersion_GP7, + ereportif(AORelationVersion_Get(aocsFetchDesc->relation) >= AORelationVersion_CB2, ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("datum with row number %ld and col no %d not found in block directory entry range", rowNum, colno), @@ -2605,7 +2605,7 @@ aocs_getnext_sample(AOCSScanDesc scan, ScanDirection direction, TupleTableSlot * /* * Perform any required upgrades on the Datum we just fetched. */ - if (curseginfo->formatversion < AORelationVersion_GetLatest()) + if (curseginfo->formatversion < AOSegfileFormatVersion_GetLatest ()) { upgrade_datum_scan(scan, attno, d, null, curseginfo->formatversion); diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index b2325b378a6..4c6bd38e778 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -343,52 +343,11 @@ get_insert_descriptor(const Relation relation) { List *segments = NIL; MemoryContext oldcxt; - AOCSInsertDesc insertDesc; oldcxt = MemoryContextSwitchTo(aocoLocal.stateCxt); - insertDesc = aocs_insert_init(relation, - ChooseSegnoForWrite(relation), - num_rows); - /* - * If we have a unique index, insert a placeholder block directory row to - * entertain uniqueness checks from concurrent inserts. See - * AppendOnlyBlockDirectory_InsertPlaceholder() for details. - * - * Note: For AOCO tables, we need to only insert a placeholder block - * directory row for the 1st non-dropped column. This is because - * during a uniqueness check, only the first non-dropped column's block - * directory entry is consulted. (See AppendOnlyBlockDirectory_CoversTuple()) - */ - if (relationHasUniqueIndex(relation)) - { - int firstNonDroppedColumn = -1; - int64 firstRowNum; - DatumStreamWrite *dsw; - BufferedAppend *bufferedAppend; - int64 fileOffset; - - for(int i = 0; i < relation->rd_att->natts; i++) - { - if (!relation->rd_att->attrs[i].attisdropped) { - firstNonDroppedColumn = i; - break; - } - } - Assert(firstNonDroppedColumn != -1); - - dsw = insertDesc->ds[firstNonDroppedColumn]; - firstRowNum = dsw->blockFirstRowNum; - bufferedAppend = &dsw->ao_write.bufferedAppend; - fileOffset = BufferedAppendNextBufferPosition(bufferedAppend); - - AppendOnlyBlockDirectory_InsertPlaceholder(&insertDesc->blockDirectory, - firstRowNum, - fileOffset, - firstNonDroppedColumn); - } - state->insertDesc = insertDesc; state->insertDesc = aocs_insert_init(relation, - ChooseSegnoForWrite(relation)); + ChooseSegnoForWrite(relation)); + dlist_init(&state->head); dlist_head *head = &state->head; dlist_push_tail(head, &state->insertDesc->node); @@ -405,6 +364,18 @@ get_insert_descriptor(const Relation relation) } list_free(segments); } + + //* mark all insertDesc placeholderInserted with false */ + if (relationHasUniqueIndex(relation)) + { + dlist_iter iter; + dlist_foreach(iter, head) + { + AOCSInsertDesc insertDesc = (AOCSInsertDesc)dlist_container(AOCSInsertDescData, node, iter.cur); + insertDesc->placeholderInserted = false; + } + } + MemoryContextSwitchTo(oldcxt); } @@ -419,7 +390,47 @@ get_insert_descriptor(const Relation relation) state->insertDesc = next; } + /* + * If we have a unique index, insert a placeholder block directory row to + * entertain uniqueness checks from concurrent inserts. See + * AppendOnlyBlockDirectory_InsertPlaceholder() for details. + * + * Note: For AOCO tables, we need to only insert a placeholder block + * directory row for the 1st non-dropped column. This is because + * during a uniqueness check, only the first non-dropped column's block + * directory entry is consulted. (See AppendOnlyBlockDirectory_CoversTuple()) + */ + if (relationHasUniqueIndex(relation) && !state->insertDesc->placeholderInserted) + { + int firstNonDroppedColumn = -1; + int64 firstRowNum; + DatumStreamWrite *dsw; + BufferedAppend *bufferedAppend; + int64 fileOffset; + AOCSInsertDesc insertDesc; + + for(int i = 0; i < relation->rd_att->natts; i++) + { + if (!relation->rd_att->attrs[i].attisdropped) { + firstNonDroppedColumn = i; + break; + } + } + Assert(firstNonDroppedColumn != -1); + + insertDesc = state->insertDesc; + dsw = insertDesc->ds[firstNonDroppedColumn]; + firstRowNum = dsw->blockFirstRowNum; + bufferedAppend = &dsw->ao_write.bufferedAppend; + fileOffset = BufferedAppendNextBufferPosition(bufferedAppend); + + AppendOnlyBlockDirectory_InsertPlaceholder(&insertDesc->blockDirectory, + firstRowNum, + fileOffset, + firstNonDroppedColumn); + insertDesc->placeholderInserted = true; + } return state->insertDesc; } @@ -819,6 +830,7 @@ aoco_index_fetch_tuple(struct IndexFetchTableData *scan, bool *call_again, bool *all_dead) { IndexFetchAOCOData *aocoscan = (IndexFetchAOCOData *) scan; + bool found = false; if (!aocoscan->aocofetch) { diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 0b2536f6f25..0cb0e96c321 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -829,7 +829,7 @@ upgrade_tuple(AppendOnlyExecutorReadBlock *executorReadBlock, * stored memtuple is problematic and then create a clone of the tuple * with properly aligned bindings to be used by the executor. */ - if (formatversion < AORelationVersion_Aligned64bit && + if (formatversion < AOSegfileFormatVersion_Aligned64bit && memtuple_has_misaligned_attribute(mtup, pbind)) convert_alignment = true; @@ -992,7 +992,7 @@ AppendOnlyExecutorReadBlock_ProcessTuple(AppendOnlyExecutorReadBlock *executorRe /* If the tuple is not in the latest format, convert it */ // GPDB_12_MERGE_FIXME: Is pg_upgrade from old versions still a thing? Can we drop this? - if (formatVersion < AORelationVersion_GetLatest()) + if (formatVersion < AOSegfileFormatVersion_GetLatest ()) tuple = upgrade_tuple(executorReadBlock, tuple, executorReadBlock->mt_bind, formatVersion, &shouldFree); ExecClearTuple(slot); @@ -1940,7 +1940,7 @@ fetchFromCurrentBlock(AppendOnlyFetchDesc aoFetchDesc, TupleTableSlot *slot) { bool fetched; - AOFetchBlockMetadata *currentBlock = &aoFetchDesc->currentBlock; + CurrentBlock *currentBlock = &aoFetchDesc->currentBlock; AppendOnlyExecutorReadBlock *executorReadBlock = &aoFetchDesc->executorReadBlock; AppendOnlyBlockDirectoryEntry *entry = ¤tBlock->blockDirectoryEntry; @@ -1966,10 +1966,10 @@ fetchFromCurrentBlock(AppendOnlyFetchDesc aoFetchDesc, /* * We fell into a hole inside the resolved block directory entry * we obtained from AppendOnlyBlockDirectory_GetEntry(). - * This should not be happening for versions >= GP7. Scream + * This should not be happening for versions >= CB2. Scream * appropriately. See AppendOnlyBlockDirectoryEntry for details. */ - ereportif(aoFetchDesc->relation->rd_appendonly->version >= AORelationVersion_GP7, + ereportif(AORelationVersion_Get(aoFetchDesc->relation) >= AORelationVersion_CB2, ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("tuple with row number %ld not found in block directory entry range", rowNum), diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index 4911a611ca1..22a47d7ab2c 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -328,35 +328,15 @@ get_insert_descriptor(const Relation relation) { List *segments = NIL; MemoryContext oldcxt; - AppendOnlyInsertDesc insertDesc; oldcxt = MemoryContextSwitchTo(appendOnlyLocal.stateCxt); - insertDesc = appendonly_insert_init(relation, - ChooseSegnoForWrite(relation), - num_rows); - /* - * If we have a unique index, insert a placeholder block directory row - * to entertain uniqueness checks from concurrent inserts. See - * AppendOnlyBlockDirectory_InsertPlaceholder() for details. - */ - if (relationHasUniqueIndex(relation)) - { - int64 firstRowNum = insertDesc->lastSequence + 1; - BufferedAppend *bufferedAppend = &insertDesc->storageWrite.bufferedAppend; - int64 fileOffset = BufferedAppendNextBufferPosition(bufferedAppend); - - AppendOnlyBlockDirectory_InsertPlaceholder(&insertDesc->blockDirectory, - firstRowNum, - fileOffset, - 0); - } - state->insertDesc = insertDesc; - state->insertDesc = appendonly_insert_init(relation, - ChooseSegnoForWrite(relation)); + state->insertDesc= appendonly_insert_init(relation, + ChooseSegnoForWrite(relation)); dlist_init(&state->head); dlist_head *head = &state->head; dlist_push_tail(head, &state->insertDesc->node); + if (state->insertDesc->insertMultiFiles) { segments = lappend_int(segments, state->insertDesc->cur_segno); @@ -369,6 +349,18 @@ get_insert_descriptor(const Relation relation) } list_free(segments); } + + //* mark all insertDesc placeholderInserted with false */ + if (relationHasUniqueIndex(relation)) + { + dlist_iter iter; + dlist_foreach(iter, head) + { + AppendOnlyInsertDesc insertDesc = (AppendOnlyInsertDesc)dlist_container(AppendOnlyInsertDescData, node, iter.cur); + insertDesc->placeholderInserted = false; + } + } + MemoryContextSwitchTo(oldcxt); } @@ -383,6 +375,26 @@ get_insert_descriptor(const Relation relation) state->insertDesc = next; } + /* + * If we have a unique index, insert a placeholder block directory row + * to entertain uniqueness checks from concurrent inserts. See + * AppendOnlyBlockDirectory_InsertPlaceholder() for details. + */ + if (relationHasUniqueIndex(relation) && !state->insertDesc->placeholderInserted) + { + + AppendOnlyInsertDesc insertDesc = state->insertDesc; + int64 firstRowNum = insertDesc->lastSequence + 1; + BufferedAppend *bufferedAppend = &insertDesc->storageWrite.bufferedAppend; + int64 fileOffset = BufferedAppendNextBufferPosition(bufferedAppend); + + AppendOnlyBlockDirectory_InsertPlaceholder(&insertDesc->blockDirectory, + firstRowNum, + fileOffset, + 0); + insertDesc->placeholderInserted = true; + } + return state->insertDesc; } diff --git a/src/backend/catalog/aoblkdir.c b/src/backend/catalog/aoblkdir.c index 112848b9693..bb64961d108 100644 --- a/src/backend/catalog/aoblkdir.c +++ b/src/backend/catalog/aoblkdir.c @@ -120,59 +120,3 @@ AlterTableCreateAoBlkdirTable(Oid relOid) table_close(rel, NoLock); } - -/* - * In relation versions older than AORelationVersion_PG12, block directory - * entries can lie about the continuity of rows *within* their range, due to - * legacy hole filling logic. Since unique index checks rely on this continuity, - * such indexes cannot be created on these relations. - * - * Called only when rel has a block directory. - */ -void -ValidateRelationVersionForUniqueIndex(Relation rel) -{ - bool error = false; - int errsegno; - int errversion; - int totalsegs; - - Assert(RelationIsAppendOptimized(rel)); - - if (RelationIsAoRows(rel)) - { - FileSegInfo **fsInfo = GetAllFileSegInfo(rel, NULL, &totalsegs, NULL); - for (int i = 0; i < totalsegs; i++) - { - if (fsInfo[i]->formatversion < AORelationVersion_PG12) - { - error = true; - errsegno = fsInfo[i]->segno; - errversion = fsInfo[i]->formatversion; - break; - } - } - } - else - { - AOCSFileSegInfo **aocsFsInfo = GetAllAOCSFileSegInfo(rel, NULL, &totalsegs, NULL); - for (int i = 0; i < totalsegs; i++) - { - if (aocsFsInfo[i]->formatversion < AORelationVersion_PG12) - { - error = true; - errsegno = aocsFsInfo[i]->segno; - errversion = aocsFsInfo[i]->formatversion; - break; - } - } - } - - if (error) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("append-only tables with older relation versions do not support unique indexes"), - errdetail("in segno = %d: version found = %d, minimum version required = %d", - errsegno, errversion, AORelationVersion_PG12), - errhint("truncate and reload the table data before creating the unique index"))); -} diff --git a/src/backend/catalog/pg_appendonly.c b/src/backend/catalog/pg_appendonly.c index 511b089199d..a051bd9288d 100644 --- a/src/backend/catalog/pg_appendonly.c +++ b/src/backend/catalog/pg_appendonly.c @@ -94,11 +94,12 @@ InsertAppendOnlyEntry(Oid relid, values[Anum_pg_appendonly_columnstore - 1] = BoolGetDatum(columnstore); values[Anum_pg_appendonly_segrelid - 1] = ObjectIdGetDatum(segrelid); values[Anum_pg_appendonly_segfilecount- 1] = Int16GetDatum(0); + values[Anum_pg_appendonly_version - 1] = Int16GetDatum(version); values[Anum_pg_appendonly_blkdirrelid - 1] = ObjectIdGetDatum(blkdirrelid); values[Anum_pg_appendonly_blkdiridxid - 1] = ObjectIdGetDatum(blkdiridxid); values[Anum_pg_appendonly_visimaprelid - 1] = ObjectIdGetDatum(visimaprelid); values[Anum_pg_appendonly_visimapidxid - 1] = ObjectIdGetDatum(visimapidxid); - values[Anum_pg_appendonly_version - 1] = Int16GetDatum(version); + /* * form the tuple and insert it @@ -672,3 +673,19 @@ GetAppendOnlySegmentFilesCount(Relation rel) table_close(pg_aoseg_rel, AccessShareLock); return result; } + +int16 +AORelationVersion_Get(Relation rel) +{ + FormData_pg_appendonly aoFormData; + + GetAppendOnlyEntry(rel->rd_id, &aoFormData); + + return aoFormData.version; +} + +bool +AORelationVersion_Validate(Relation rel, int16 version) +{ + return AORelationVersion_Get(rel) >= version; +} diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index ec47605ac43..51e30960223 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1072,19 +1072,13 @@ DefineIndex(Oid relationId, if (stmt->unique && RelationIsAppendOptimized(rel)) { - /* XXX: Remove when unique indexes are fully supported on AO/CO tables. */ - if (!gp_appendonly_enable_unique_index) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("append-only tables do not support unique indexes"))); - if (stmt->concurrent) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("append-only tables do not support unique indexes built concurrently"))); /* Additional version checks needed if block directory already exists */ - if (OidIsValid(blkdirrelid) && !AORelationVersion_Validate(rel, AORelationVersion_GP7)) + if (OidIsValid(blkdirrelid) && !AORelationVersion_Validate(rel, AORelationVersion_CB2)) { /* * We currently raise an error in this scenario. We could alternatively @@ -1098,8 +1092,8 @@ DefineIndex(Oid relationId, ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("append-only tables with older relation versions do not support unique indexes"), - errdetail("version found = %d, minimum version required = %d", rel->rd_appendonly->version, - AORelationVersion_GP7), + errdetail("version found = %d, minimum version required = %d", AORelationVersion_Get(rel), + AORelationVersion_CB2), errhint("ALTER TABLE SET WITH (REORGANIZE = true) before creating the unique index"))); } } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index ff8397852d7..d8cb3975af5 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2573,7 +2573,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, cluster_rel(relid, InvalidOid, &cluster_params); } else /* Heap vacuum or AO/CO vacuum in specific phase */ - table_relation_vacuum(onerel, params, vac_strategy); + table_relation_vacuum(rel, params, vac_strategy); /* Roll back any GUC changes executed by index functions */ AtEOXact_GUC(false, save_nestlevel); diff --git a/src/backend/commands/vacuum_ao.c b/src/backend/commands/vacuum_ao.c index 6aecb6d5965..17c4d7826f2 100644 --- a/src/backend/commands/vacuum_ao.c +++ b/src/backend/commands/vacuum_ao.c @@ -154,7 +154,7 @@ static bool appendonly_tid_reaped(ItemPointer itemptr, void *state); static void vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot, int elevel, BlockNumber *rel_pages, double *rel_tuples, - bool *relhasindex); + bool *relhasindex, BlockNumber *total_file_segs); static int vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs, BufferAccessStrategy bstrategy); static void ao_vacuum_rel_recycle_dead_segments(Relation onerel, VacuumParams *params, @@ -491,7 +491,7 @@ vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs { for (i = 0; i < nindexes; i++) { - scan_index(Irel[i], Irel[i]->rd_rel->reltuples, elevel, bstrategy); + scan_index(Irel[i], aoRelation , elevel, bstrategy); } } else @@ -499,7 +499,7 @@ vacuum_appendonly_indexes(Relation aoRelation, int options, Bitmapset *dead_segs for (i = 0; i < nindexes; i++) { vacuum_appendonly_index(Irel[i], - Irel[i]->rd_rel->reltuples, + aoRelation->rd_rel->reltuples, dead_segs, elevel, bstrategy); @@ -540,7 +540,7 @@ vacuum_appendonly_index(Relation indexRelation, /* Do bulk deletion */ stats = index_bulk_delete(&ivinfo, NULL, appendonly_tid_reaped, - (void *) vacuumIndexState); + (void *) dead_segs); SIMPLE_FAULT_INJECTOR("vacuum_ao_after_index_delete"); @@ -679,10 +679,9 @@ vacuum_appendonly_fill_stats(Relation aorel, Snapshot snapshot, int elevel, * * We use this when we have no deletions to do. */ -static void +void scan_index(Relation indrel, - AppendOnlyIndexVacuumState *vacuumIndexState, - double num_tuples, + Relation aorel, int elevel, BufferAccessStrategy vac_strategy) { IndexBulkDeleteResult *stats; @@ -695,15 +694,12 @@ scan_index(Relation indrel, ivinfo.analyze_only = false; ivinfo.estimated_count = false; ivinfo.message_level = elevel; - ivinfo.num_heap_tuples = num_tuples; + ivinfo.num_heap_tuples = aorel->rd_rel->reltuples; ivinfo.strategy = vac_strategy; - /* Do bulk deletion */ - stats = index_bulk_delete(&ivinfo, NULL, appendonly_tid_reaped, - (void *) vacuumIndexState); /* Do post-VACUUM cleanup */ - stats = index_vacuum_cleanup(&ivinfo, stats); + stats = index_vacuum_cleanup(&ivinfo, NULL); if (!stats) return; diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 3193dd320b3..719049a646b 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -964,19 +964,6 @@ LockAcquireExtended(const LOCKTAG *locktag, } } - /* - * We don't acquire any other heavyweight lock while holding the relation - * extension lock. We do allow to acquire the same relation extension - * lock more than once but that case won't reach here. - */ - Assert(!IsRelationExtensionLockHeld); - - /* - * We don't acquire any other heavyweight lock while holding the page lock - * except for relation extension. - */ - Assert(!IsPageLockHeld || - (locktag->locktag_type == LOCKTAG_RELATION_EXTEND)); /* * Prepare to emit a WAL record if acquisition of this lock needs to be diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 77add09bb54..2e39cea261c 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -138,7 +138,6 @@ bool Test_print_prefetch_joinqual = false; bool Test_copy_qd_qe_split = false; bool gp_permit_relation_node_change = false; int gp_max_local_distributed_cache = 1024; -bool gp_appendonly_enable_unique_index = false; bool gp_appendonly_verify_block_checksums = true; bool gp_appendonly_verify_write_block = false; bool gp_appendonly_compaction = true; @@ -883,17 +882,6 @@ struct config_bool ConfigureNamesBool_gp[] = NULL, NULL, NULL }, - { - {"gp_appendonly_enable_unique_index", PGC_USERSET, DEVELOPER_OPTIONS, - gettext_noop("Enable unique indexes on AO/CO tables (experimental)."), - NULL, - GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL - }, - &gp_appendonly_enable_unique_index, - false, - NULL, NULL, NULL - }, - { {"gp_appendonly_verify_block_checksums", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Verify the append-only block checksum when reading."), diff --git a/src/include/catalog/pg_appendonly.h b/src/include/catalog/pg_appendonly.h index 3ad63bbfaa2..45a0ab1ca37 100644 --- a/src/include/catalog/pg_appendonly.h +++ b/src/include/catalog/pg_appendonly.h @@ -34,11 +34,11 @@ CATALOG(pg_appendonly,6105,AppendOnlyRelationId) bool columnstore; /* true if orientation is column */ Oid segrelid; /* OID of aoseg table; 0 if none */ int16 segfilecount; /* the (per seg) average total number of segment file */ + int16 version; /* AO relation version see AORelationVersion for detail */ Oid blkdirrelid; /* OID of aoblkdir table; 0 if none */ Oid blkdiridxid; /* if aoblkdir table, OID of aoblkdir index */ Oid visimaprelid; /* OID of the aovisimap table */ Oid visimapidxid; /* OID of aovisimap index */ - int16 version; /* AO relation version */ } FormData_pg_appendonly; /* GPDB added foreign key definitions for gpcheckcat. */ @@ -49,7 +49,7 @@ FOREIGN_KEY(relid REFERENCES pg_class(oid)); * (there are no var-length fields currentl.) */ #define APPENDONLY_TUPLE_SIZE \ - (offsetof(FormData_pg_appendonly,version) + sizeof(Oid)) + (offsetof(FormData_pg_appendonly,visimapidxid) + sizeof(Oid)) /* ---------------- * Form_pg_appendonly corresponds to a pointer to a tuple with @@ -65,15 +65,13 @@ typedef FormData_pg_appendonly *Form_pg_appendonly; typedef enum AORelationVersion { AORelationVersion_None = 0, - AORelationVersion_GP6 = 1, - AORelationVersion_GP7 = 2, + AORelationVersion_CB1 = 1, + AORelationVersion_CB2 = 2, /* version after aoblkdir remove hole filling + * mechanims used for unique index */ MaxAORelationVersion } AORelationVersion; -#define AORelationVersion_GetLatest() AORelationVersion_GP7 -#define AORelationVersion_Get(relation) (relation)->rd_appendonly->version -#define AORelationVersion_Validate(relation, version) \ - (AORelationVersion_Get((relation)) >= (version)) +#define AORelationVersion_GetLatest() AORelationVersion_CB2 #define AORelationVersion_IsValid(version) \ ((version) > AORelationVersion_None && (version) < MaxAORelationVersion) @@ -125,17 +123,8 @@ static inline void AOSegfileFormatVersion_CheckValid(int version) */ #define PG82NumericConversionNeeded(version) \ ( \ - AORelationVersion_CheckValid(version), \ - (version > AORelationVersion_Original) \ -) - -/* - * Are numerics stored in old, pre-PostgreSQL 8.3 format, and need converting? - */ -#define PG82NumericConversionNeeded(version) \ -( \ - AORelationVersion_CheckValid(version), \ - (version < AORelationVersion_PG83) \ + AOSegfileFormatVersion_CheckValid(version), \ + (version > AOSegfileFormatVersion_Original) \ ) extern void @@ -200,4 +189,10 @@ SwapAppendonlyEntries(Oid entryRelId1, Oid entryRelId2); extern int16 GetAppendOnlySegmentFilesCount(Relation rel); +extern int16 +AORelationVersion_Get(Relation rel); + +extern bool +AORelationVersion_Validate(Relation rel, int16 version); + #endif /* PG_APPENDONLY_H */ diff --git a/src/include/cdb/cdbaocsam.h b/src/include/cdb/cdbaocsam.h index 48c5ae2cbf5..ed06d6cb522 100644 --- a/src/include/cdb/cdbaocsam.h +++ b/src/include/cdb/cdbaocsam.h @@ -77,6 +77,8 @@ typedef struct AOCSInsertDescData bool insertMultiFiles; /* insert into multi files */ dlist_node node; /* node of segfiles list */ int range; /* inserted tuples of each range */ + /* flag for insert placeholder in unique index */ + bool placeholderInserted; } AOCSInsertDescData; typedef AOCSInsertDescData *AOCSInsertDesc; @@ -372,12 +374,6 @@ extern bool aocs_fetch(AOCSFetchDesc aocsFetchDesc, AOTupleId *aoTupleId, TupleTableSlot *slot); extern void aocs_fetch_finish(AOCSFetchDesc aocsFetchDesc); - -extern AOCSUpdateDesc aocs_update_init(Relation rel, int segno); -extern void aocs_update_finish(AOCSUpdateDesc desc); -extern TM_Result aocs_update(AOCSUpdateDesc desc, TupleTableSlot *slot, - AOTupleId *oldTupleId, AOTupleId *newTupleId); - extern AOCSDeleteDesc aocs_delete_init(Relation rel); extern TM_Result aocs_delete(AOCSDeleteDesc desc, AOTupleId *aoTupleId); diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index dfeeff76453..1242b3b8ce7 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -118,6 +118,9 @@ typedef struct AppendOnlyInsertDescData bool insertMultiFiles; /* insert into multi files */ dlist_node node; /* node of segfiles list */ int range; /* inserted tuples of each range */ + /* flag for insert placeholder in unique index */ + bool placeholderInserted; + } AppendOnlyInsertDescData; typedef AppendOnlyInsertDescData *AppendOnlyInsertDesc; diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 08e100da29d..3fba8197b95 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -385,7 +385,7 @@ extern void analyze_rel(Oid relid, RangeVar *relation, /* in commands/vacuumlazy.c */ extern void lazy_vacuum_rel_heap(Relation onerel, VacuumParams *params, BufferAccessStrategy bstrategy); - +extern void scan_index(Relation indrel, Relation aorel, int elevel, BufferAccessStrategy bstrategy); /* in commands/vacuum_ao.c */ extern void ao_vacuum_rel(Relation rel, VacuumParams *params, BufferAccessStrategy bstrategy); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index c7578ffd5a5..2b4f7ff23b2 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -712,7 +712,7 @@ typedef struct EState /* Should the executor skip past the alien plan nodes */ bool eliminateAliens; - + Bitmapset *locallyExecutableSubplans; /* * GPDB: gp_bypass_unique_check is introduced so that routines, such as AO * vacuum, can avoid running uniqueness checks while inserting tuples. diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index c168ab31938..0e1c15bcf64 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -293,7 +293,6 @@ extern bool Debug_bitmap_print_insert; extern bool enable_checksum_on_tables; extern int gp_max_local_distributed_cache; extern bool gp_local_distributed_cache_stats; -extern bool gp_appendonly_enable_unique_index; extern bool gp_appendonly_verify_block_checksums; extern bool gp_appendonly_verify_write_block; extern bool gp_appendonly_compaction; diff --git a/src/test/isolation2/expected/ao_unique_index.out b/src/test/isolation2/expected/ao_unique_index.out index b6a49a20a37..8748b85bd69 100644 --- a/src/test/isolation2/expected/ao_unique_index.out +++ b/src/test/isolation2/expected/ao_unique_index.out @@ -520,3 +520,52 @@ ABORT DROP TABLE unique_index_ao_row; DROP + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for ADD CONSTRAINT ------------------------ +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_row (a INT) USING ao_row DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 5); +INSERT 5 + +ALTER table unique_index_ao_row ADD CONSTRAINT a_unique UNIQUE(a); +ALTER +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1); +DETAIL: Key (a)=(1) already exists. +ERROR: duplicate key value violates unique constraint "a_unique" +ALTER table unique_index_ao_row DROP CONSTRAINT a_unique; +ALTER + +INSERT INTO unique_index_ao_row VALUES (1); +INSERT 1 +-- should failed +ALTER table unique_index_ao_row ADD CONSTRAINT a_unique UNIQUE(a); +DETAIL: Key (a)=(1) is duplicated. +ERROR: could not create unique index "a_unique" + +DROP TABLE unique_index_ao_row; +DROP + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for Multiple Key --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_row (a INT, b INT) USING ao_row DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_ao_row SELECT i,i FROM generate_series(1, 5) i; +INSERT 5 + +CREATE UNIQUE INDEX a_b_unique ON unique_index_ao_row(a,b); +CREATE +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (1,2); +INSERT 1 +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1,1); +DETAIL: Key (a, b)=(1, 1) already exists. +ERROR: duplicate key value violates unique constraint "a_b_unique" +DROP TABLE unique_index_ao_row; +DROP diff --git a/src/test/isolation2/expected/aocs_unique_index.out b/src/test/isolation2/expected/aocs_unique_index.out index 8aa5e0e8522..839dc5b159d 100644 --- a/src/test/isolation2/expected/aocs_unique_index.out +++ b/src/test/isolation2/expected/aocs_unique_index.out @@ -520,3 +520,52 @@ ABORT DROP TABLE unique_index_ao_column; DROP + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for ADD CONSTRAINT ------------------------ +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_column (a INT) USING ao_column DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 5); +INSERT 5 + +ALTER table unique_index_ao_column ADD CONSTRAINT a_unique UNIQUE(a); +ALTER +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1); +DETAIL: Key (a)=(1) already exists. +ERROR: duplicate key value violates unique constraint "a_unique" +ALTER table unique_index_ao_column DROP CONSTRAINT a_unique; +ALTER + +INSERT INTO unique_index_ao_column VALUES (1); +INSERT 1 +-- should failed +ALTER table unique_index_ao_column ADD CONSTRAINT a_unique UNIQUE(a); +DETAIL: Key (a)=(1) is duplicated. +ERROR: could not create unique index "a_unique" + +DROP TABLE unique_index_ao_column; +DROP + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for Multiple Key --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_column (a INT, b INT) USING ao_column DISTRIBUTED REPLICATED; +CREATE +INSERT INTO unique_index_ao_column SELECT i,i FROM generate_series(1, 5) i; +INSERT 5 + +CREATE UNIQUE INDEX a_b_unique ON unique_index_ao_column(a,b); +CREATE +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (1,2); +INSERT 1 +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1,1); +DETAIL: Key (a, b)=(1, 1) already exists. +ERROR: duplicate key value violates unique constraint "a_b_unique" +DROP TABLE unique_index_ao_column; +DROP diff --git a/src/test/isolation2/input/uao/ao_unique_index_vacuum.source b/src/test/isolation2/input/uao/ao_unique_index_vacuum.source index 00daf441a68..4b1f3ca0b4e 100644 --- a/src/test/isolation2/input/uao/ao_unique_index_vacuum.source +++ b/src/test/isolation2/input/uao/ao_unique_index_vacuum.source @@ -44,30 +44,24 @@ SELECT gp_inject_fault('appendonly_insert', 'reset', dbid) 1<: DROP TABLE unique_index_vacuum_@amname@; --- Case 3: Concurrent case with a conflicting insert where the vacuum is hung +-- Case 3: Validate the occurrence of vacuum index. -- just after it has bulk deleted the old index entries. CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ DISTRIBUTED REPLICATED; INSERT INTO unique_index_vacuum_@amname@ SELECT generate_series(1, 5); DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; -SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'suspend', dbid) - FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +select gp_inject_fault_infinite('vacuum_ao_after_index_delete', 'skip', dbid) + from gp_segment_configuration where role = 'p' AND content <> -1; -1&: VACUUM unique_index_vacuum_@amname@; +VACUUM unique_index_vacuum_@amname@; -- Wait until all old index entries have been deleted by the VACUUM. SELECT gp_wait_until_triggered_fault('vacuum_ao_after_index_delete', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; --- Now trying to insert key = 1 will also be detected as a conflict, even --- though the old index entries are no longer present. We have the new index --- entries (and the new block directory rows) to thank, which have already been --- persisted at end of insert, within the VACUUM. -2: INSERT INTO unique_index_vacuum_@amname@ VALUES (1); -SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'reset', dbid) +SELECT gp_inject_fault_infinite('vacuum_ao_after_index_delete', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; -1<: DROP TABLE unique_index_vacuum_@amname@; diff --git a/src/test/isolation2/input/uao/test_pg_appendonly_version.source b/src/test/isolation2/input/uao/test_pg_appendonly_version.source index e4830ba208a..9c613e66a16 100644 --- a/src/test/isolation2/input/uao/test_pg_appendonly_version.source +++ b/src/test/isolation2/input/uao/test_pg_appendonly_version.source @@ -1,9 +1,9 @@ -- Validate pg_appendonly.version functioning by unique index creation --- as it requires pg_appendonly.version >= AORelationVersion_GP7. +-- as it requires pg_appendonly.version >= AORelationVersion_CB2. create table @amname@_version_tbl (a int) using @amname@; --- unique index on AO is supported starting from version 2 (AORelationVersion_GP7) +-- unique index on AO is supported starting from version 2 (AORelationVersion_CB2) select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; create unique index on @amname@_version_tbl(a); insert into @amname@_version_tbl select generate_series(1, 10); @@ -15,7 +15,7 @@ select * from @amname@_version_tbl where a = 2; set allow_system_table_mods = on; update pg_appendonly set version = 1 where relid = '@amname@_version_tbl'::regclass; --- unique index on AO isn't supported on version < AORelationVersion_GP7 +-- unique index on AO isn't supported on version < AORelationVersion_CB2 select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; create unique index on @amname@_version_tbl(a); diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index 66f6fd7f2d0..9ff22c39f82 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -154,7 +154,6 @@ test: uao/vacuum_cleanup_row test: uao/vacuum_index_stats_row test: uao/bitmapindex_rescan_row test: uao/limit_indexscan_inits_row -test: uao/create_index_allows_readonly_row test: uao/test_pg_appendonly_version_row # Refer to the case comment for why it is commented out. # test: uao/bad_buffer_on_temp_ao_row @@ -211,7 +210,6 @@ test: uao/vacuum_cleanup_column test: uao/vacuum_index_stats_column test: uao/bitmapindex_rescan_column test: uao/limit_indexscan_inits_column -test: uao/create_index_allows_readonly_column test: uao/test_pg_appendonly_version_column # Refer to the case comment for why it is commented out. # test: uao/bad_buffer_on_temp_ao_column diff --git a/src/test/isolation2/output/uao/ao_unique_index_vacuum.source b/src/test/isolation2/output/uao/ao_unique_index_vacuum.source index 9e7f70b1ced..74db357c6ab 100644 --- a/src/test/isolation2/output/uao/ao_unique_index_vacuum.source +++ b/src/test/isolation2/output/uao/ao_unique_index_vacuum.source @@ -74,7 +74,7 @@ VACUUM DROP TABLE unique_index_vacuum_@amname@; DROP --- Case 3: Concurrent case with a conflicting insert where the vacuum is hung +-- Case 3: Validate the occurrence of vacuum index. -- just after it has bulk deleted the old index entries. CREATE TABLE unique_index_vacuum_@amname@(i int UNIQUE) USING @amname@ DISTRIBUTED REPLICATED; CREATE @@ -83,15 +83,16 @@ INSERT 5 DELETE FROM unique_index_vacuum_@amname@ WHERE i = 5; DELETE 1 -SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'suspend', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; - gp_inject_fault ------------------ - Success: - Success: - Success: +select gp_inject_fault_infinite('vacuum_ao_after_index_delete', 'skip', dbid) from gp_segment_configuration where role = 'p' AND content <> -1; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: (3 rows) -1&: VACUUM unique_index_vacuum_@amname@; +VACUUM unique_index_vacuum_@amname@; +VACUUM -- Wait until all old index entries have been deleted by the VACUUM. SELECT gp_wait_until_triggered_fault('vacuum_ao_after_index_delete', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; @@ -102,23 +103,14 @@ SELECT gp_wait_until_triggered_fault('vacuum_ao_after_index_delete', 1, dbid) FR Success: (3 rows) --- Now trying to insert key = 1 will also be detected as a conflict, even --- though the old index entries are no longer present. We have the new index --- entries (and the new block directory rows) to thank, which have already been --- persisted at end of insert, within the VACUUM. -2: INSERT INTO unique_index_vacuum_@amname@ VALUES (1); -ERROR: duplicate key value violates unique constraint "unique_index_vacuum_@amname@_i_key" (seg1 192.168.0.148:7003 pid=3197808) -DETAIL: Key (i)=(1) already exists. -SELECT gp_inject_fault('vacuum_ao_after_index_delete', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; - gp_inject_fault ------------------ - Success: - Success: - Success: +SELECT gp_inject_fault_infinite('vacuum_ao_after_index_delete', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; + gp_inject_fault_infinite +-------------------------- + Success: + Success: + Success: (3 rows) -1<: <... completed> -VACUUM DROP TABLE unique_index_vacuum_@amname@; DROP diff --git a/src/test/isolation2/output/uao/test_pg_appendonly_version.source b/src/test/isolation2/output/uao/test_pg_appendonly_version.source index bfdbb067cea..2bcf4f25353 100644 --- a/src/test/isolation2/output/uao/test_pg_appendonly_version.source +++ b/src/test/isolation2/output/uao/test_pg_appendonly_version.source @@ -1,10 +1,10 @@ -- Validate pg_appendonly.version functioning by unique index creation --- as it requires pg_appendonly.version >= AORelationVersion_GP7. +-- as it requires pg_appendonly.version >= AORelationVersion_CB2. create table @amname@_version_tbl (a int) using @amname@; CREATE --- unique index on AO is supported starting from version 2 (AORelationVersion_GP7) +-- unique index on AO is supported starting from version 2 (AORelationVersion_CB2) select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; version --------- @@ -30,7 +30,7 @@ SET update pg_appendonly set version = 1 where relid = '@amname@_version_tbl'::regclass; UPDATE 1 --- unique index on AO isn't supported on version < AORelationVersion_GP7 +-- unique index on AO isn't supported on version < AORelationVersion_CB2 select version from pg_appendonly where relid = '@amname@_version_tbl'::regclass; version --------- diff --git a/src/test/isolation2/sql/ao_unique_index.sql b/src/test/isolation2/sql/ao_unique_index.sql index 8035916f95e..68c2faad128 100644 --- a/src/test/isolation2/sql/ao_unique_index.sql +++ b/src/test/isolation2/sql/ao_unique_index.sql @@ -342,3 +342,37 @@ INSERT INTO unique_index_ao_row VALUES(2); 2: ABORT; DROP TABLE unique_index_ao_row; + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for ADD CONSTRAINT ------------------------ +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_row (a INT) USING ao_row + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_ao_row SELECT * FROM generate_series(1, 5); + +ALTER table unique_index_ao_row ADD CONSTRAINT a_unique UNIQUE(a); +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1); +ALTER table unique_index_ao_row DROP CONSTRAINT a_unique; + +INSERT INTO unique_index_ao_row VALUES (1); +-- should failed +ALTER table unique_index_ao_row ADD CONSTRAINT a_unique UNIQUE(a); + +DROP TABLE unique_index_ao_row; + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for Multiple Key --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_row (a INT, b INT) USING ao_row + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_ao_row SELECT i,i FROM generate_series(1, 5) i; + +CREATE UNIQUE INDEX a_b_unique ON unique_index_ao_row(a,b); +-- should not conflict +INSERT INTO unique_index_ao_row VALUES (1,2); +-- should conflict +INSERT INTO unique_index_ao_row VALUES (1,1); +DROP TABLE unique_index_ao_row; \ No newline at end of file diff --git a/src/test/isolation2/sql/aocs_unique_index.sql b/src/test/isolation2/sql/aocs_unique_index.sql index 4cd59262ce3..e2624f6da9f 100644 --- a/src/test/isolation2/sql/aocs_unique_index.sql +++ b/src/test/isolation2/sql/aocs_unique_index.sql @@ -342,3 +342,37 @@ INSERT INTO unique_index_ao_column VALUES(2); 2: ABORT; DROP TABLE unique_index_ao_column; + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for ADD CONSTRAINT ------------------------ +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_column (a INT) USING ao_column + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_ao_column SELECT * FROM generate_series(1, 5); + +ALTER table unique_index_ao_column ADD CONSTRAINT a_unique UNIQUE(a); +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1); +ALTER table unique_index_ao_column DROP CONSTRAINT a_unique; + +INSERT INTO unique_index_ao_column VALUES (1); +-- should failed +ALTER table unique_index_ao_column ADD CONSTRAINT a_unique UNIQUE(a); + +DROP TABLE unique_index_ao_column; + + +-------------------------------------------------------------------------------- +----------------------- Smoke tests for Multiple Key --------------------------- +-------------------------------------------------------------------------------- +CREATE TABLE unique_index_ao_column (a INT, b INT) USING ao_column + DISTRIBUTED REPLICATED; +INSERT INTO unique_index_ao_column SELECT i,i FROM generate_series(1, 5) i; + +CREATE UNIQUE INDEX a_b_unique ON unique_index_ao_column(a,b); +-- should not conflict +INSERT INTO unique_index_ao_column VALUES (1,2); +-- should conflict +INSERT INTO unique_index_ao_column VALUES (1,1); +DROP TABLE unique_index_ao_column; \ No newline at end of file