diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index 267dd837697..9f688537ac3 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -409,29 +409,6 @@ open_next_scan_seg(AOCSScanDesc scan) */ if (scan->blockDirectory) { - /* - * if building the block directory, we need to make sure - * the sequence starts higher than our highest tuple's - * rownum. In the case of upgraded blocks, the highest - * tuple will have tupCount as its row num for non-upgrade - * cases, which use the sequence, it will be enough to - * start off the end of the sequence; note that this is - * not ideal -- if we are at least curSegInfo->tupcount + - * 1 then we don't even need to update the sequence value - */ - int64 firstSequence; - Oid segrelid; - GetAppendOnlyEntryAuxOids(RelationGetRelid(scan->rs_base.rs_rd), - scan->appendOnlyMetaDataSnapshot, - &segrelid, NULL, NULL, - NULL, NULL); - - firstSequence = - GetFastSequences(segrelid, - curSegInfo->segno, - curSegInfo->total_tupcount + 1, - NUM_FAST_SEQUENCES); - AppendOnlyBlockDirectory_Init_forInsert(scan->blockDirectory, scan->appendOnlyMetaDataSnapshot, (FileSegInfo *) curSegInfo, @@ -440,10 +417,6 @@ open_next_scan_seg(AOCSScanDesc scan) curSegInfo->segno, scan->columnScanInfo.relationTupleDesc->natts, true); - - InsertFastSequenceEntry(segrelid, - curSegInfo->segno, - firstSequence); } open_all_datumstreamread_segfiles(scan->rs_base.rs_rd, diff --git a/src/backend/access/aocs/aocssegfiles.c b/src/backend/access/aocs/aocssegfiles.c index 9f389f94cd6..32a391c7421 100644 --- a/src/backend/access/aocs/aocssegfiles.c +++ b/src/backend/access/aocs/aocssegfiles.c @@ -88,10 +88,6 @@ InsertInitialAOCSFileSegInfo(Relation prel, int32 segno, int32 nvp, Oid segrelid segrel = heap_open(segrelid, RowExclusiveLock); - InsertFastSequenceEntry(segrelid, - (int64) segno, - 0); - values[Anum_pg_aocs_segno - 1] = Int32GetDatum(segno); values[Anum_pg_aocs_vpinfo - 1] = PointerGetDatum(vpinfo); values[Anum_pg_aocs_tupcount - 1] = Int64GetDatum(0); diff --git a/src/backend/access/appendonly/aosegfiles.c b/src/backend/access/appendonly/aosegfiles.c index 2e651099cb8..1887ff46dee 100644 --- a/src/backend/access/appendonly/aosegfiles.c +++ b/src/backend/access/appendonly/aosegfiles.c @@ -110,10 +110,6 @@ InsertInitialSegnoEntry(Relation parentrel, int segno) GetAppendOnlyEntryAuxOids(parentrel->rd_id, NULL, &segrelid, NULL, NULL, NULL, NULL); - InsertFastSequenceEntry(segrelid, - (int64) segno, - 0); - pg_aoseg_rel = heap_open(segrelid, RowExclusiveLock); pg_aoseg_dsc = RelationGetDescr(pg_aoseg_rel); diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 4d84e957026..01f405a0ad2 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -305,27 +305,6 @@ SetNextFileSegForRead(AppendOnlyScanDesc scan) /* Initialize the block directory for inserts if needed. */ if (scan->blockDirectory) { - Oid segrelid; - - GetAppendOnlyEntryAuxOids(reln->rd_id, NULL, - &segrelid, NULL, NULL, NULL, NULL); - - /* - * if building the block directory, we need to make sure the - * sequence starts higher than our highest tuple's rownum. In - * the case of upgraded blocks, the highest tuple will have - * tupCount as its row num for non-upgrade cases, which use - * the sequence, it will be enough to start off the end of the - * sequence; note that this is not ideal -- if we are at least - * curSegInfo->tupcount + 1 then we don't even need to update - * the sequence value. - */ - int64 firstSequence = - GetFastSequences(segrelid, - segno, - fsinfo->total_tupcount + 1, - NUM_FAST_SEQUENCES); - AppendOnlyBlockDirectory_Init_forInsert(scan->blockDirectory, scan->appendOnlyMetaDataSnapshot, fsinfo, @@ -334,10 +313,6 @@ SetNextFileSegForRead(AppendOnlyScanDesc scan) segno, /* segno */ 1, /* columnGroupNo */ false); - - InsertFastSequenceEntry(segrelid, - segno, - firstSequence); } finished_all_files = false; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 47fe3ba86af..3a229b3027f 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6876,6 +6876,64 @@ heap_freeze_tuple(HeapTupleHeader tuple, return do_freeze; } +/* + * GPDB: heap_freeze_tuple_wal_logged + * Similar to heap_freeze_tuple, but with WAL logging AND do not check + * cutoff xid (i.e. we blindly freeze a tuple and write WAL for it). + * + * Useful when we want to freeze a tuple immediately after inserting it. + */ +void +heap_freeze_tuple_wal_logged(Relation rel, HeapTuple tup) +{ + xl_heap_freeze_tuple frozen = {0}; + Buffer buffer; + Page page; + HeapTupleHeader htup; + + /* Set the passed-in tuple to be frozen */ + HeapTupleHeaderSetXminFrozen(tup->t_data); + + /* + * Prepare the xl_heap_freeze_tuple manually (instead of heap_prepare_freeze_tuple) + * as we do not need the checks in heap_prepare_freeze_tuple. Note that this would + * suffer from having more field been added to xl_heap_freeze_tuple in future. + * But that would be caught by a test case in isolation2/frozen_insert_crash. + * Also, we don't set frozen->frzflags as those are to be set only during vacuum. + */ + frozen.xmax = HeapTupleHeaderGetRawXmax(tup->t_data); + frozen.offset = ItemPointerGetOffsetNumber(&(tup->t_self)); + frozen.t_infomask = tup->t_data->t_infomask; + frozen.t_infomask2 = tup->t_data->t_infomask2; + + buffer = ReadBuffer(rel, ItemPointerGetBlockNumber(&(tup->t_self))); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + page = (Page) BufferGetPage(buffer); + + START_CRIT_SECTION(); + + MarkBufferDirty(buffer); + + /* freeze the tuple in buffer */ + htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, frozen.offset)); + heap_execute_freeze_tuple(htup, &frozen); + + /* WAL logging */ + if (RelationNeedsWAL(rel)) + { + XLogRecPtr recptr; + + recptr = log_heap_freeze(rel, buffer, InvalidTransactionId /* cutoff_xid */, + &frozen, 1 /*ntuples*/); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + + UnlockReleaseBuffer(buffer); +} + /* * For a given MultiXactId, return the hint bits that should be set in the * tuple's infomask. @@ -8927,8 +8985,11 @@ heap_xlog_freeze_page(XLogReaderState *record) /* * In Hot Standby mode, ensure that there's no queries running which still * consider the frozen xids as running. + * GPDB: but do nothing if there is no valid cutoff xid, which means the + * record is not generated by vacuum but by specifically freezing a tuple + * (see heap_freeze_tuple_no_cutoff). */ - if (InHotStandby) + if (InHotStandby && TransactionIdIsValid(cutoff_xid)) { RelFileNode rnode; TransactionId latestRemovedXid = cutoff_xid; diff --git a/src/backend/catalog/gp_fastsequence.c b/src/backend/catalog/gp_fastsequence.c index eb3dcdbb0ee..fa3eaa742f8 100644 --- a/src/backend/catalog/gp_fastsequence.c +++ b/src/backend/catalog/gp_fastsequence.c @@ -23,6 +23,8 @@ #include "access/genam.h" #include "access/htup.h" #include "access/heapam.h" +#include "access/xact.h" +#include "utils/faultinjector.h" #include "utils/syscache.h" #include "catalog/gp_indexing.h" @@ -85,63 +87,6 @@ InsertInitialFastSequenceEntries(Oid objid) table_close(gp_fastsequence_rel, RowExclusiveLock); } -/* - * InsertFastSequenceEntry - * - * Insert a new fast sequence entry for a given object. If the given - * object already exists in the table, this function replaces the old - * entry with a fresh initial value. - */ -void -InsertFastSequenceEntry(Oid objid, int64 objmod, int64 lastSequence) -{ - Relation gp_fastsequence_rel; - ScanKeyData scankey[2]; - SysScanDesc scan; - TupleDesc tupleDesc; - HeapTuple tuple = NULL; - - /* - * Open and lock the gp_fastsequence catalog table. - */ - gp_fastsequence_rel = table_open(FastSequenceRelationId, RowExclusiveLock); - tupleDesc = RelationGetDescr(gp_fastsequence_rel); - - /* SELECT * FROM gp_fastsequence WHERE objid = :1 AND objmod = :2 FOR UPDATE */ - ScanKeyInit(&scankey[0], - Anum_gp_fastsequence_objid, - BTEqualStrategyNumber, F_OIDEQ, - ObjectIdGetDatum(objid)); - ScanKeyInit(&scankey[1], - Anum_gp_fastsequence_objmod, - BTEqualStrategyNumber, F_INT8EQ, - Int64GetDatum(objmod)); - scan = systable_beginscan(gp_fastsequence_rel, FastSequenceObjidObjmodIndexId, true, - NULL, 2, scankey); - - tuple = systable_getnext(scan); - insert_or_update_fastsequence(gp_fastsequence_rel, - tuple, - tupleDesc, - objid, - objmod, - lastSequence); - systable_endscan(scan); - - /* - * gp_fastsequence table locking for AO inserts uses bottom up approach - * meaning the locks are first acquired on the segments and later on the - * master. - * Hence, it is essential that we release the lock here to avoid - * any form of master-segment resource deadlock. E.g. A transaction - * trying to reindex gp_fastsequence has acquired a lock on it on the - * master but is blocked on the segment as another transaction which - * is an insert operation has acquired a lock first on segment and is - * trying to acquire a lock on the Master. Deadlock! - */ - table_close(gp_fastsequence_rel, RowExclusiveLock); -} - /* * insert or update the existing fast sequence number for (objid, objmod). * @@ -175,7 +120,27 @@ insert_or_update_fastsequence(Relation gp_fastsequence_rel, newTuple = heaptuple_form_to(tupleDesc, values, nulls, NULL, NULL); - CatalogTupleInsertFrozen(gp_fastsequence_rel, newTuple); + /* insert the tuple */ + CatalogTupleInsert(gp_fastsequence_rel, newTuple); + +#ifdef FAULT_INJECTOR + FaultInjector_InjectFaultIfSet( + "insert_fastsequence_before_freeze", + DDLNotSpecified, + "", //databaseName + RelationGetRelationName(gp_fastsequence_rel)); +#endif + + /* freeze the tuple */ + heap_freeze_tuple_wal_logged(gp_fastsequence_rel, newTuple); + +#ifdef FAULT_INJECTOR + FaultInjector_InjectFaultIfSet( + "insert_fastsequence_after_freeze", + DDLNotSpecified, + "", //databaseName + RelationGetRelationName(gp_fastsequence_rel)); +#endif heap_freetuple(newTuple); } @@ -280,7 +245,17 @@ int64 GetFastSequences(Oid objid, int64 objmod, systable_endscan(scan); - /* Refer to the comment at the end of InsertFastSequenceEntry. */ + /* + * gp_fastsequence table locking for AO inserts uses bottom up approach + * meaning the locks are first acquired on the segments and later on the + * master. + * Hence, it is essential that we release the lock here to avoid + * any form of master-segment resource deadlock. E.g. A transaction + * trying to reindex gp_fastsequence has acquired a lock on it on the + * master but is blocked on the segment as another transaction which + * is an insert operation has acquired a lock first on segment and is + * trying to acquire a lock on the Master. Deadlock! + */ table_close(gp_fastsequence_rel, RowExclusiveLock); return firstSequence; @@ -341,7 +316,17 @@ int64 ReadLastSequence(Oid objid, int64 objmod) systable_endscan(scan); - /* Refer to the comment at the end of InsertFastSequenceEntry. */ + /* + * gp_fastsequence table locking for AO inserts uses bottom up approach + * meaning the locks are first acquired on the segments and later on the + * master. + * Hence, it is essential that we release the lock here to avoid + * any form of master-segment resource deadlock. E.g. A transaction + * trying to reindex gp_fastsequence has acquired a lock on it on the + * master but is blocked on the segment as another transaction which + * is an insert operation has acquired a lock first on segment and is + * trying to acquire a lock on the Master. Deadlock! + */ heap_close(gp_fastsequence_rel, AccessShareLock); return lastSequence; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 5686f2f678a..1a5d8d279e7 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -173,6 +173,7 @@ extern void heap_inplace_update(Relation relation, HeapTuple tuple); extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi); +extern void heap_freeze_tuple_wal_logged(Relation rel, HeapTuple tuple); extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); diff --git a/src/include/catalog/gp_fastsequence.h b/src/include/catalog/gp_fastsequence.h index e8ff66e90bc..201ed058843 100644 --- a/src/include/catalog/gp_fastsequence.h +++ b/src/include/catalog/gp_fastsequence.h @@ -41,14 +41,6 @@ FOREIGN_KEY(objid REFERENCES pg_class(oid)); typedef FormData_gp_fastsequence *Form_gp_fastsequence; #define NUM_FAST_SEQUENCES 100 - -/* - * Insert a new light-weight fast sequence entry for a given object. - */ -extern void InsertFastSequenceEntry(Oid objid, int64 objmod, - int64 lastSequence); - - extern void InsertInitialFastSequenceEntries(Oid objid); /* diff --git a/src/test/isolation2/expected/frozen_insert_crash.out b/src/test/isolation2/expected/frozen_insert_crash.out new file mode 100644 index 00000000000..61c2f1d44cd --- /dev/null +++ b/src/test/isolation2/expected/frozen_insert_crash.out @@ -0,0 +1,212 @@ +-- Test server crash in case of frozen insert. Make sure that after crash +-- recovery, the frozen insert and index are consistent: +-- +-- 1. If crash happened before the row is frozen, the row will be invisible; +-- 2. If crash happened after the row is frozen, the row will be visible. +-- +-- And the above behavior should remain consistent using seqscan or indexscan. +-- +-- We test gp_fastsequence here since it does frozen insert and has an index. + +-- Case 1. crash after the regular MVCC insert has made to disk, but not +-- the WAL record responsible for updating it to frozen. +-- After crash recovery, the insert will follow regular MVCC and not be seen. +1: create table tab_fi(a int) with (appendoptimized=true) distributed replicated; +CREATE + +-- switch WAL on seg0 to reduce flakiness +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + gp_segment_id | ?column? +---------------+---------- + 0 | t +(1 row) + +-- suspend right after the insert into gp_fastsequence during an AO table insert, +-- but before freezing the tuple +1: select gp_inject_fault('insert_fastsequence_before_freeze', 'suspend', ''/*DDL*/, ''/*DB*/, 'gp_fastsequence'/*table*/, 1/*start occur*/, 1/*end occur*/, 0/*extra_arg*/, dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +2>: insert into tab_fi values(1); + +1: select gp_wait_until_triggered_fault('insert_fastsequence_before_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) + +-- switch WAL on seg0, so the new row gets flushed (including its index) +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + gp_segment_id | ?column? +---------------+---------- + 0 | t +(1 row) + +-- inject a panic, and resume the insert. The WAL for the freeze operation is not +-- going to be made to disk (we just flushed WALs), so we won't replay it during restart later. +-- skip FTS probe to prevent unexpected mirror promotion +1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) +1: select gp_inject_fault('appendonly_insert', 'panic', ''/*DDL*/, ''/*DB*/, 'tab_fi'/*table*/, 1/*start occur*/, -1/*end occur*/, 0/*extra_arg*/, 2/*db_id*/); + gp_inject_fault +----------------- + Success: +(1 row) +1: select gp_inject_fault('insert_fastsequence_before_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1; + gp_inject_fault +----------------- + Success: +(1 row) + +2<: <... completed> +ERROR: fault triggered, fault name:'appendonly_insert' fault type:'panic' (seg0 127.0.1.1:7002 pid=14710) + +1q: ... + +-- check the gp_fastsequence content w/ table vs index scan, neither should see the +-- new inserted row (objmod=1) following MVCC +1: set enable_indexscan = off; +SET +1: set enable_seqscan = on; +SET +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); + gp_segment_id | objmod | last_sequence +---------------+--------+--------------- + 0 | 0 | 0 +(1 row) +1: set enable_indexscan = on; +SET +1: set enable_seqscan = off; +SET +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); + gp_segment_id | objmod | last_sequence +---------------+--------+--------------- + 0 | 0 | 0 +(1 row) +1: reset enable_indexscan; +RESET +1: reset enable_seqscan; +RESET + +1: drop table tab_fi; +DROP + +-- Case 2. crash after we have flushed the WAL that updates the row to be frozen. +-- After crash recovery, the insert should be seen. +1: create table tab_fi(a int) with (appendoptimized=true) distributed replicated; +CREATE + +-- switch WAL on seg0 to reduce flakiness +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + gp_segment_id | ?column? +---------------+---------- + 0 | t +(1 row) + +-- suspend right after freezing the tuple +1: select gp_inject_fault('insert_fastsequence_after_freeze', 'suspend', ''/*DDL*/, ''/*DB*/, 'gp_fastsequence'/*table*/, 1/*start occur*/, 1/*end occur*/, 0/*extra_arg*/, dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) + +2>: insert into tab_fi values(1); + +1: select gp_wait_until_triggered_fault('insert_fastsequence_after_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) + +-- switch WAL on seg0, so the freeze record gets flushed +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + gp_segment_id | ?column? +---------------+---------- + 0 | t +(1 row) + +-- While we are on it, check the wal record for the freeze operation. +-- One of the purposes is to guard against unexpected addition to the xl_heap_freeze_tuple struct in future. +-- If this test failed due to WAL size, please check to see if the xl_heap_freeze_tuple struct +-- has changed, and if we should initialize any new field in heap_freeze_tuple_no_cutoff(). +! seg0_datadir=$(psql -At -c "select datadir from gp_segment_configuration where content = 0 and role = 'p'" postgres) && seg0_last_wal_file=$(psql -At -c "SELECT pg_walfile_name(pg_current_wal_lsn()) from gp_dist_random('gp_id') where gp_segment_id = 0" postgres) && pg_waldump ${seg0_last_wal_file} -p ${seg0_datadir}/pg_wal | grep FREEZE_PAGE; +rmgr: Heap2 len (rec/tot): 68/ 68, tx: ##, lsn: #/########, prev #/########, desc: FREEZE_PAGE cutoff xid 0 ntuples 1, blkref #0: rel ####/######/###### blk 0 + + +-- inject a panic and resume in same way as Case 1. But this time we will be able to replay the frozen insert. +-- skip FTS probe to prevent unexpected mirror promotion +1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) +1: select gp_inject_fault('appendonly_insert', 'panic', ''/*DDL*/, ''/*DB*/, 'tab_fi'/*table*/, 1/*start occur*/, -1/*end occur*/, 0/*extra_arg*/, 2/*db_id*/); + gp_inject_fault +----------------- + Success: +(1 row) +1: select gp_inject_fault('insert_fastsequence_after_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; + gp_inject_fault +----------------- + Success: +(1 row) +1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1; + gp_inject_fault +----------------- + Success: +(1 row) + +2<: <... completed> +ERROR: fault triggered, fault name:'appendonly_insert' fault type:'panic' (seg0 127.0.1.1:7002 pid=14775) + +1q: ... + +-- check the gp_fastsequence content w/ table vs index scan, both should see the new inserted row (objmod=1) +1: set enable_indexscan = off; +SET +1: set enable_seqscan = on; +SET +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); + gp_segment_id | objmod | last_sequence +---------------+--------+--------------- + 0 | 0 | 0 + 0 | 1 | 100 +(2 rows) +1: set enable_indexscan = on; +SET +1: set enable_seqscan = off; +SET +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); + gp_segment_id | objmod | last_sequence +---------------+--------+--------------- + 0 | 0 | 0 + 0 | 1 | 100 +(2 rows) +1: reset enable_indexscan; +RESET +1: reset enable_seqscan; +RESET + +1: drop table tab_fi; +DROP + +-- validate that we've actually tested desired scan method +-- for some reason this disrupts the output of subsequent queries so +-- validating at the end here +! psql postgres -At -c "set enable_indexscan = off; set enable_seqscan = on; explain (costs off) select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi');" | grep "Seq Scan on gp_fastsequence"; + -> Seq Scan on gp_fastsequence f + +! psql postgres -At -c "set enable_indexscan = on; set enable_seqscan = off; explain (costs off) select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi');" | grep "Index Scan using gp_fastsequence"; + -> Index Scan using gp_fastsequence_objid_objmod_index on gp_fastsequence f + + diff --git a/src/test/isolation2/expected/prevent_ao_wal.out b/src/test/isolation2/expected/prevent_ao_wal.out index 00388edeb7d..fd76c39696e 100644 --- a/src/test/isolation2/expected/prevent_ao_wal.out +++ b/src/test/isolation2/expected/prevent_ao_wal.out @@ -21,18 +21,6 @@ GP_IGNORE: formatted by atmsort.pm -- end_matchignore GP_IGNORE: defined new match expression --- start_matchsubs --- m/tx:\s+\d+/ --- s/tx:\s+\d+/tx: ##/ - --- m/lsn: \d\/[0-9a-fA-F]+, prev \d\/[0-9a-fA-F]+/ --- s/lsn: \d\/[0-9a-fA-F]+, prev \d\/[0-9a-fA-F]+/lsn: #\/########, prev #\/########/ - --- m/rel \d+\/\d+\/\d+/ --- s/rel \d+\/\d+\/\d+/rel ####\/######\/######/ --- end_matchsubs -GP_IGNORE: defined new match expression - -- Create tables (AO, AOCO) -1U: CREATE TABLE ao_foo (n int) WITH (appendonly=true); CREATE diff --git a/src/test/isolation2/init_file_isolation2 b/src/test/isolation2/init_file_isolation2 index 4b0ad2a8db7..1c01246e203 100644 --- a/src/test/isolation2/init_file_isolation2 +++ b/src/test/isolation2/init_file_isolation2 @@ -59,4 +59,14 @@ s/available \d+ MB// m/\(cdbdisp_async\.c\:\d+\)/ s/\(cdbdisp_async\.c:\d+\)/\(cdbdisp_async\.c:LINE_NUM\)/ +# remove WAL details from pg_waldump output +m/tx:\s+\d+/ +s/tx:\s+\d+/tx: ##/ + +m/lsn: \d\/[0-9a-fA-F]+, prev \d\/[0-9a-fA-F]+/ +s/lsn: \d\/[0-9a-fA-F]+, prev \d\/[0-9a-fA-F]+/lsn: #\/########, prev #\/########/ + +m/rel \d+\/\d+\/\d+/ +s/rel \d+\/\d+\/\d+/rel ####\/######\/######/ + -- end_matchsubs diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index e89566e9d67..708610618e6 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -24,6 +24,7 @@ test: unlogged_heap_tables test: unlogged_appendonly_tables test: udf_exception_blocks_panic_scenarios test: ao_same_trans_truncate_crash +test: frozen_insert_crash test: prevent_ao_wal diff --git a/src/test/isolation2/sql/frozen_insert_crash.sql b/src/test/isolation2/sql/frozen_insert_crash.sql new file mode 100644 index 00000000000..d45ad3b12ae --- /dev/null +++ b/src/test/isolation2/sql/frozen_insert_crash.sql @@ -0,0 +1,106 @@ +-- Test server crash in case of frozen insert. Make sure that after crash +-- recovery, the frozen insert and index are consistent: +-- +-- 1. If crash happened before the row is frozen, the row will be invisible; +-- 2. If crash happened after the row is frozen, the row will be visible. +-- +-- And the above behavior should remain consistent using seqscan or indexscan. +-- +-- We test gp_fastsequence here since it does frozen insert and has an index. + +-- Case 1. crash after the regular MVCC insert has made to disk, but not +-- the WAL record responsible for updating it to frozen. +-- After crash recovery, the insert will follow regular MVCC and not be seen. +1: create table tab_fi(a int) with (appendoptimized=true) distributed replicated; + +-- switch WAL on seg0 to reduce flakiness +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + +-- suspend right after the insert into gp_fastsequence during an AO table insert, +-- but before freezing the tuple +1: select gp_inject_fault('insert_fastsequence_before_freeze', 'suspend', ''/*DDL*/, ''/*DB*/, 'gp_fastsequence'/*table*/, 1/*start occur*/, 1/*end occur*/, 0/*extra_arg*/, dbid) from gp_segment_configuration where role = 'p' and content = 0; + +2>: insert into tab_fi values(1); + +1: select gp_wait_until_triggered_fault('insert_fastsequence_before_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0; + +-- switch WAL on seg0, so the new row gets flushed (including its index) +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + +-- inject a panic, and resume the insert. The WAL for the freeze operation is not +-- going to be made to disk (we just flushed WALs), so we won't replay it during restart later. +-- skip FTS probe to prevent unexpected mirror promotion +1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1; +1: select gp_inject_fault('appendonly_insert', 'panic', ''/*DDL*/, ''/*DB*/, 'tab_fi'/*table*/, 1/*start occur*/, -1/*end occur*/, 0/*extra_arg*/, 2/*db_id*/); +1: select gp_inject_fault('insert_fastsequence_before_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; +1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1; + +2<: + +1q: + +-- check the gp_fastsequence content w/ table vs index scan, neither should see the +-- new inserted row (objmod=1) following MVCC +1: set enable_indexscan = off; +1: set enable_seqscan = on; +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); +1: set enable_indexscan = on; +1: set enable_seqscan = off; +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); +1: reset enable_indexscan; +1: reset enable_seqscan; + +1: drop table tab_fi; + +-- Case 2. crash after we have flushed the WAL that updates the row to be frozen. +-- After crash recovery, the insert should be seen. +1: create table tab_fi(a int) with (appendoptimized=true) distributed replicated; + +-- switch WAL on seg0 to reduce flakiness +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + +-- suspend right after freezing the tuple +1: select gp_inject_fault('insert_fastsequence_after_freeze', 'suspend', ''/*DDL*/, ''/*DB*/, 'gp_fastsequence'/*table*/, 1/*start occur*/, 1/*end occur*/, 0/*extra_arg*/, dbid) from gp_segment_configuration where role = 'p' and content = 0; + +2>: insert into tab_fi values(1); + +1: select gp_wait_until_triggered_fault('insert_fastsequence_after_freeze', 1, dbid) from gp_segment_configuration where role = 'p' and content = 0; + +-- switch WAL on seg0, so the freeze record gets flushed +1: select gp_segment_id, pg_switch_wal() is not null from gp_dist_random('gp_id') where gp_segment_id = 0; + +-- While we are on it, check the wal record for the freeze operation. +-- One of the purposes is to guard against unexpected addition to the xl_heap_freeze_tuple struct in future. +-- If this test failed due to WAL size, please check to see if the xl_heap_freeze_tuple struct +-- has changed, and if we should initialize any new field in heap_freeze_tuple_no_cutoff(). +! seg0_datadir=$(psql -At -c "select datadir from gp_segment_configuration where content = 0 and role = 'p'" postgres) && seg0_last_wal_file=$(psql -At -c "SELECT pg_walfile_name(pg_current_wal_lsn()) from gp_dist_random('gp_id') where gp_segment_id = 0" postgres) && pg_waldump ${seg0_last_wal_file} -p ${seg0_datadir}/pg_wal | grep FREEZE_PAGE; + +-- inject a panic and resume in same way as Case 1. But this time we will be able to replay the frozen insert. +-- skip FTS probe to prevent unexpected mirror promotion +1: select gp_inject_fault_infinite('fts_probe', 'skip', dbid) from gp_segment_configuration where role='p' and content=-1; +1: select gp_inject_fault('appendonly_insert', 'panic', ''/*DDL*/, ''/*DB*/, 'tab_fi'/*table*/, 1/*start occur*/, -1/*end occur*/, 0/*extra_arg*/, 2/*db_id*/); +1: select gp_inject_fault('insert_fastsequence_after_freeze', 'reset', dbid) from gp_segment_configuration where role = 'p' and content = 0; +1: select gp_inject_fault('fts_probe', 'reset', dbid) from gp_segment_configuration where role='p' and content=-1; + +2<: + +1q: + +-- check the gp_fastsequence content w/ table vs index scan, both should see the new inserted row (objmod=1) +1: set enable_indexscan = off; +1: set enable_seqscan = on; +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); +1: set enable_indexscan = on; +1: set enable_seqscan = off; +1: select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi'); +1: reset enable_indexscan; +1: reset enable_seqscan; + +1: drop table tab_fi; + +-- validate that we've actually tested desired scan method +-- for some reason this disrupts the output of subsequent queries so +-- validating at the end here +! psql postgres -At -c "set enable_indexscan = off; set enable_seqscan = on; explain (costs off) select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi');" | grep "Seq Scan on gp_fastsequence"; +! psql postgres -At -c "set enable_indexscan = on; set enable_seqscan = off; explain (costs off) select distinct f.gp_segment_id, f.objmod, f.last_sequence from gp_dist_random('gp_fastsequence') f left join gp_dist_random('pg_appendonly') a on segrelid = objid and a.gp_segment_id = f.gp_segment_id where a.gp_segment_id = 0 and relid = (select oid from pg_class where relname = 'tab_fi');" | grep "Index Scan using gp_fastsequence"; + diff --git a/src/test/isolation2/sql/prevent_ao_wal.sql b/src/test/isolation2/sql/prevent_ao_wal.sql index cfee9b0dc43..d78138f3e58 100644 --- a/src/test/isolation2/sql/prevent_ao_wal.sql +++ b/src/test/isolation2/sql/prevent_ao_wal.sql @@ -19,17 +19,6 @@ -- m/.*Table doesn't have 'DISTRIBUTED BY' clause*/ -- end_matchignore --- start_matchsubs --- m/tx:\s+\d+/ --- s/tx:\s+\d+/tx: ##/ - --- m/lsn: \d\/[0-9a-fA-F]+, prev \d\/[0-9a-fA-F]+/ --- s/lsn: \d\/[0-9a-fA-F]+, prev \d\/[0-9a-fA-F]+/lsn: #\/########, prev #\/########/ - --- m/rel \d+\/\d+\/\d+/ --- s/rel \d+\/\d+\/\d+/rel ####\/######\/######/ --- end_matchsubs - -- Create tables (AO, AOCO) -1U: CREATE TABLE ao_foo (n int) WITH (appendonly=true); -1U: CREATE TABLE aoco_foo (n int, m int) WITH (appendonly=true, orientation=column); diff --git a/src/test/regress/expected/alter_table_aocs.out b/src/test/regress/expected/alter_table_aocs.out index cfcccc2da95..19b3f3cbdc3 100644 --- a/src/test/regress/expected/alter_table_aocs.out +++ b/src/test/regress/expected/alter_table_aocs.out @@ -265,34 +265,32 @@ begin; insert into addcol6 select i,i from generate_series(1,10)i; -- abort the first insert, still should advance gp_fastsequence for this -- relation. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='addcol6')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 0 | 1 - FrozenXid | 1 | 100 | 1 - NormalXid | 0 | 0 | 2 - FrozenXid | 1 | 100 | 2 - NormalXid | 0 | 0 | 0 - FrozenXid | 1 | 100 | 0 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 0 | 1 + 1 | 100 | 1 + 0 | 0 | 2 + 1 | 100 | 2 + 0 | 0 | 0 + 1 | 100 | 0 (6 rows) abort; -- check gp_fastsequence remains advanced. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='addcol6')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 0 | 1 - FrozenXid | 1 | 100 | 1 - NormalXid | 0 | 0 | 2 - FrozenXid | 1 | 100 | 2 - NormalXid | 0 | 0 | 0 - FrozenXid | 1 | 100 | 0 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 0 | 1 + 1 | 100 | 1 + 0 | 0 | 2 + 1 | 100 | 2 + 0 | 0 | 0 + 1 | 100 | 0 (6 rows) insert into addcol6 select i,i/2 from generate_series(1,20)i; @@ -312,18 +310,17 @@ select a,c from addcol6 where b > 5 order by a; (9 rows) -- Lets validate after alter gp_fastsequence reflects correctly. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='addcol6')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 0 | 0 - FrozenXid | 1 | 200 | 0 - NormalXid | 0 | 0 | 1 - FrozenXid | 1 | 200 | 1 - NormalXid | 0 | 0 | 2 - FrozenXid | 1 | 200 | 2 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 0 | 0 + 1 | 200 | 0 + 0 | 0 | 1 + 1 | 200 | 1 + 0 | 0 | 2 + 1 | 200 | 2 (6 rows) -- add column with default value as sequence diff --git a/src/test/regress/input/appendonly.source b/src/test/regress/input/appendonly.source index 03f4921092a..caec78fe346 100644 --- a/src/test/regress/input/appendonly.source +++ b/src/test/regress/input/appendonly.source @@ -24,9 +24,9 @@ CREATE TABLE tenk_heap ( -- valid CREATE TABLE tenk_ao1 (like tenk_heap) with (appendonly=true, checksum=true) distributed by(unique1); --- creating AO table should create entry in gp_fastsequence with normal xid -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +-- We used to check xmin here and other places in this test, but now the new gp_fastsequence +-- rows are frozen via hintbits (HEAP_XMIN_FROZEN) so not checking that anymore. +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='tenk_ao1')); @@ -209,8 +209,7 @@ SELECT count(*) FROM tenk_ao1; -- should show previous count SELECT aototal('tenk_ao1'); -- gp_fastsequence should reflect bump in lastsequence, even if above -- transaction aborted as its tuples is in place updated. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='tenk_ao1')); @@ -284,8 +283,7 @@ CREATE TABLE tenk_ao1 with(appendonly=true, checksum=true) AS SELECT * FROM tenk -- With and without ORCA last_sequence fluctuates bit and hence using >= 3300 as -- inserting 10k tuples to 3 node system must atleast have last_sequence >= 3300 -- on each node. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, - objmod, +SELECT objmod, CASE WHEN last_sequence = 0 THEN 'zero' WHEN last_sequence >= 3300 THEN '>= 3300' ELSE '1-2900' END AS last_sequence, @@ -315,13 +313,11 @@ BEGIN; CREATE TABLE appendonly_sametxn_create_insert(a int, b int) with (appendonly=true); INSERT INTO appendonly_sametxn_create_insert select * from generate_series(1, 10); -- Make sure insert is using segfile 0 for the insert, as part of create table itself. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='appendonly_sametxn_create_insert')); INSERT INTO appendonly_sametxn_create_insert select * from generate_series(1, 10); -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='appendonly_sametxn_create_insert')); ABORT; @@ -365,8 +361,7 @@ SELECT unique1 FROM tenk_ao1 EXCEPT SELECT unique1 FROM tenk_ao1; SELECT unique1 FROM tenk_heap EXCEPT SELECT unique1 FROM tenk_ao3; -- Get gp_fastsequence details before truncate -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, - objmod, +SELECT objmod, CASE WHEN last_sequence = 0 THEN 'zero' WHEN last_sequence >= 3300 THEN '>= 3300' ELSE '1-2900' END AS last_sequence, @@ -380,8 +375,7 @@ TRUNCATE tenk_ao2; -- Truncate changes relfilnode, as a result old pg_aoseg table is truncated and -- gp_fastsequence entries are also reinitialized. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, - objmod, +SELECT objmod, CASE WHEN last_sequence = 0 THEN 'zero' WHEN last_sequence >= 3300 THEN '>= 3300' ELSE '1-2900' END AS last_sequence, @@ -536,7 +530,7 @@ select count(*) from tenk_ao1 where unique2 < 0; ALTER TABLE tenk_ao1 RENAME TO tenk_renamed; ALTER TABLE tenk_renamed ADD COLUMN newcol int default 10; -- Validate post alter gp_fastsequence reflects correctly -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, CASE +SELECT objmod, CASE WHEN objmod = 0 THEN last_sequence >= 3300 WHEN objmod = 1 THEN last_sequence = 0 END, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE @@ -591,14 +585,12 @@ DROP TABLE IF EXISTS ao_selection; CREATE TABLE ao_selection (a INT, b INT) WITH (appendonly=true); INSERT INTO ao_selection VALUES (generate_series(1,100000), generate_series(1,10000)); -- Validates insert is using single segfile to perform the insert to gp_fastsequence. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='ao_selection')); -- Following insert without concurrency is also using same segfile as above INSERT INTO ao_selection values (generate_series(1,100000), generate_series(1,10000)); -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='ao_selection')); @@ -658,15 +650,17 @@ BEGIN; SAVEPOINT sp1; CREATE TABLE appendonly_subxans_test(a int, b int) WITH (appendonly=true); INSERT INTO appendonly_subxans_test SELECT * FROM generate_series(1, 10); -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='appendonly_subxans_test')); ROLLBACK TO SAVEPOINT sp1; COMMIT; --- create table and insert in nested subtransaction, to validate insert to --- gp_fastsequence is using NormalXid and not FrozenXid. +-- create table and insert in nested subtransaction. +-- The original purpose of this test is to validate that insert to gp_fastsequence +-- is using NormalXid and not FrozenXid. But since now we do not set the xmin to +-- FrozenTransactionXid anymore, it makes less sense in that regard, but this +-- nested transaction case might still be valuable, so keep running it. BEGIN; SAVEPOINT sp1; CREATE TABLE appendonly_subxans_test(a int, b int) WITH (appendonly=true); diff --git a/src/test/regress/output/appendonly.source b/src/test/regress/output/appendonly.source index 41c12f5bf86..8b954766e33 100644 --- a/src/test/regress/output/appendonly.source +++ b/src/test/regress/output/appendonly.source @@ -22,16 +22,16 @@ CREATE TABLE tenk_heap ( -- -- valid CREATE TABLE tenk_ao1 (like tenk_heap) with (appendonly=true, checksum=true) distributed by(unique1); --- creating AO table should create entry in gp_fastsequence with normal xid -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +-- We used to check xmin here and other places in this test, but now the new gp_fastsequence +-- rows are frozen via hintbits (HEAP_XMIN_FROZEN) so not checking that anymore. +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='tenk_ao1')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 0 | 1 - NormalXid | 0 | 0 | 2 - NormalXid | 0 | 0 | 0 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 0 | 1 + 0 | 0 | 2 + 0 | 0 | 0 (3 rows) CREATE TABLE tenk_ao2 (like tenk_heap) with (appendonly=true, compresslevel=0, blocksize=262144) distributed by(unique1); @@ -346,18 +346,17 @@ SELECT aototal('tenk_ao1'); -- gp_fastsequence should reflect bump in lastsequence, even if above -- transaction aborted as its tuples is in place updated. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='tenk_ao1')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 0 | 1 - FrozenXid | 1 | 17000 | 1 - NormalXid | 0 | 0 | 0 - FrozenXid | 1 | 17000 | 0 - NormalXid | 0 | 0 | 2 - FrozenXid | 1 | 16400 | 2 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 0 | 0 + 0 | 0 | 1 + 0 | 0 | 2 + 1 | 16400 | 2 + 1 | 17000 | 0 + 1 | 17000 | 1 (6 rows) -- commit @@ -495,8 +494,7 @@ CREATE TABLE tenk_ao1 with(appendonly=true, checksum=true) AS SELECT * FROM tenk -- With and without ORCA last_sequence fluctuates bit and hence using >= 3300 as -- inserting 10k tuples to 3 node system must atleast have last_sequence >= 3300 -- on each node. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, - objmod, +SELECT objmod, CASE WHEN last_sequence = 0 THEN 'zero' WHEN last_sequence >= 3300 THEN '>= 3300' ELSE '1-2900' END AS last_sequence, @@ -504,11 +502,11 @@ SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, FROM gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN ( SELECT oid FROM pg_class WHERE relname='tenk_ao1')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | >= 3300 | 0 - NormalXid | 0 | >= 3300 | 1 - NormalXid | 0 | >= 3300 | 2 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | >= 3300 | 0 + 0 | >= 3300 | 1 + 0 | >= 3300 | 2 (3 rows) -- Since we check last_sequence for the tenk_ao2 table later, the data distribution should be same for orca and planner. @@ -572,27 +570,25 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO appendonly_sametxn_create_insert select * from generate_series(1, 10); -- Make sure insert is using segfile 0 for the insert, as part of create table itself. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='appendonly_sametxn_create_insert')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 100 | 1 - NormalXid | 0 | 100 | 0 - NormalXid | 0 | 100 | 2 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 100 | 1 + 0 | 100 | 0 + 0 | 100 | 2 (3 rows) INSERT INTO appendonly_sametxn_create_insert select * from generate_series(1, 10); -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='appendonly_sametxn_create_insert')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 200 | 1 - NormalXid | 0 | 200 | 2 - NormalXid | 0 | 200 | 0 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 200 | 1 + 0 | 200 | 2 + 0 | 200 | 0 (3 rows) ABORT; @@ -722,8 +718,7 @@ SELECT unique1 FROM tenk_heap EXCEPT SELECT unique1 FROM tenk_ao3; (0 rows) -- Get gp_fastsequence details before truncate -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, - objmod, +SELECT objmod, CASE WHEN last_sequence = 0 THEN 'zero' WHEN last_sequence >= 3300 THEN '>= 3300' ELSE '1-2900' END AS last_sequence, @@ -731,19 +726,18 @@ SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, FROM gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN ( SELECT oid FROM pg_class WHERE relname='tenk_ao2')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | >= 3300 | 1 - NormalXid | 0 | >= 3300 | 2 - NormalXid | 0 | >= 3300 | 0 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | >= 3300 | 1 + 0 | >= 3300 | 2 + 0 | >= 3300 | 0 (3 rows) -- TRUNCATE TRUNCATE tenk_ao2; -- Truncate changes relfilnode, as a result old pg_aoseg table is truncated and -- gp_fastsequence entries are also reinitialized. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, - objmod, +SELECT objmod, CASE WHEN last_sequence = 0 THEN 'zero' WHEN last_sequence >= 3300 THEN '>= 3300' ELSE '1-2900' END AS last_sequence, @@ -751,11 +745,11 @@ SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, FROM gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN ( SELECT oid FROM pg_class WHERE relname='tenk_ao2')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | zero | 0 - NormalXid | 0 | zero | 2 - NormalXid | 0 | zero | 1 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | zero | 0 + 0 | zero | 2 + 0 | zero | 1 (3 rows) -- WITH OIDS is no longer supported @@ -1125,16 +1119,16 @@ select count(*) from tenk_ao1 where unique2 < 0; ALTER TABLE tenk_ao1 RENAME TO tenk_renamed; ALTER TABLE tenk_renamed ADD COLUMN newcol int default 10; -- Validate post alter gp_fastsequence reflects correctly -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, CASE +SELECT objmod, CASE WHEN objmod = 0 THEN last_sequence >= 3300 WHEN objmod = 1 THEN last_sequence = 0 END, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='tenk_renamed')); - case | objmod | case | gp_segment_id ------------+--------+------+--------------- - NormalXid | 0 | t | 0 - NormalXid | 0 | t | 1 - NormalXid | 0 | t | 2 + objmod | case | gp_segment_id +--------+------+--------------- + 0 | t | 0 + 0 | t | 1 + 0 | t | 2 (3 rows) ALTER TABLE tenk_renamed ALTER COLUMN twothousand SET NOT NULL; @@ -1212,34 +1206,32 @@ DROP TABLE IF EXISTS ao_selection; CREATE TABLE ao_selection (a INT, b INT) WITH (appendonly=true); INSERT INTO ao_selection VALUES (generate_series(1,100000), generate_series(1,10000)); -- Validates insert is using single segfile to perform the insert to gp_fastsequence. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='ao_selection')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 0 | 2 - FrozenXid | 1 | 33300 | 2 - NormalXid | 0 | 0 | 0 - FrozenXid | 1 | 33500 | 0 - NormalXid | 0 | 0 | 1 - FrozenXid | 1 | 33400 | 1 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 0 | 2 + 1 | 33300 | 2 + 0 | 0 | 0 + 1 | 33500 | 0 + 0 | 0 | 1 + 1 | 33400 | 1 (6 rows) -- Following insert without concurrency is also using same segfile as above INSERT INTO ao_selection values (generate_series(1,100000), generate_series(1,10000)); -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='ao_selection')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 0 | 0 - FrozenXid | 1 | 67000 | 0 - NormalXid | 0 | 0 | 1 - FrozenXid | 1 | 66800 | 1 - NormalXid | 0 | 0 | 2 - FrozenXid | 1 | 66600 | 2 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 0 | 0 + 1 | 67000 | 0 + 0 | 0 | 1 + 1 | 66800 | 1 + 0 | 0 | 2 + 1 | 66600 | 2 (6 rows) -- Check compression and distribution @@ -1373,21 +1365,23 @@ CREATE TABLE appendonly_subxans_test(a int, b int) WITH (appendonly=true); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO appendonly_subxans_test SELECT * FROM generate_series(1, 10); -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='appendonly_subxans_test')); - case | objmod | last_sequence | gp_segment_id ------------+--------+---------------+--------------- - NormalXid | 0 | 100 | 0 - NormalXid | 0 | 100 | 1 - NormalXid | 0 | 100 | 2 + objmod | last_sequence | gp_segment_id +--------+---------------+--------------- + 0 | 100 | 0 + 0 | 100 | 1 + 0 | 100 | 2 (3 rows) ROLLBACK TO SAVEPOINT sp1; COMMIT; --- create table and insert in nested subtransaction, to validate insert to --- gp_fastsequence is using NormalXid and not FrozenXid. +-- create table and insert in nested subtransaction. +-- The original purpose of this test is to validate that insert to gp_fastsequence +-- is using NormalXid and not FrozenXid. But since now we do not set the xmin to +-- FrozenTransactionXid anymore, it makes less sense in that regard, but this +-- nested transaction case might still be valuable, so keep running it. BEGIN; SAVEPOINT sp1; CREATE TABLE appendonly_subxans_test(a int, b int) WITH (appendonly=true); diff --git a/src/test/regress/sql/alter_table_aocs.sql b/src/test/regress/sql/alter_table_aocs.sql index 3a16ca3fa0d..2db6d5139e6 100644 --- a/src/test/regress/sql/alter_table_aocs.sql +++ b/src/test/regress/sql/alter_table_aocs.sql @@ -180,15 +180,13 @@ begin; insert into addcol6 select i,i from generate_series(1,10)i; -- abort the first insert, still should advance gp_fastsequence for this -- relation. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='addcol6')); abort; -- check gp_fastsequence remains advanced. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='addcol6')); @@ -197,8 +195,7 @@ alter table addcol6 add column c float default 1.2; select a,c from addcol6 where b > 5 order by a; -- Lets validate after alter gp_fastsequence reflects correctly. -SELECT CASE WHEN xmin = 2 THEN 'FrozenXid' ELSE 'NormalXid' END, objmod, -last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid +SELECT objmod, last_sequence, gp_segment_id from gp_dist_random('gp_fastsequence') WHERE objid IN (SELECT segrelid FROM pg_appendonly WHERE relid IN (SELECT oid FROM pg_class WHERE relname='addcol6'));