diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fc7e78213c2..c0ca57a81f9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -36,7 +36,7 @@ jobs: needs: build runs-on: [self-hosted, example] env: - MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off -c gp_appendonly_insert_files=0' installcheck-world" + MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off' installcheck-world" TEST_OS: "centos" DUMP_DB: "true" steps: @@ -83,7 +83,7 @@ jobs: needs: build runs-on: [self-hosted, example] env: - MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=on -c gp_appendonly_insert_files=0' installcheck-world" + MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=on' installcheck-world" TEST_OS: "centos" DUMP_DB: "true" steps: @@ -130,7 +130,7 @@ jobs: needs: build runs-on: [self-hosted, example] env: - MAKE_TEST_COMMAND: "-C src/test/regress -k PGOPTIONS='-c gp_appendonly_insert_files=0' installcheck-cbdb-parallel" + MAKE_TEST_COMMAND: "-C src/test/regress installcheck-cbdb-parallel" TEST_OS: "centos" DUMP_DB: "true" steps: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2e2a056bd27..123205dbee2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -39,7 +39,7 @@ jobs: needs: build runs-on: [self-hosted, example] env: - MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off -c gp_appendonly_insert_files=0' installcheck-world" + MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off' installcheck-world" TEST_OS: "centos" DUMP_DB: "true" BUILD_TYPE: release @@ -95,7 +95,7 @@ jobs: needs: build runs-on: [self-hosted, example] env: - MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=on -c gp_appendonly_insert_files=0' installcheck-world" + MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=on' installcheck-world" TEST_OS: "centos" DUMP_DB: "true" BUILD_TYPE: release @@ -151,7 +151,7 @@ jobs: needs: build runs-on: [self-hosted, example] env: - MAKE_TEST_COMMAND: "-C src/test/regress -k PGOPTIONS='-c gp_appendonly_insert_files=0' installcheck-cbdb-parallel" + MAKE_TEST_COMMAND: "-C src/test/regress installcheck-cbdb-parallel" TEST_OS: "centos" DUMP_DB: "true" steps: diff --git a/src/backend/access/aocs/aocsam.c b/src/backend/access/aocs/aocsam.c index 7ff06caa629..0ae994659b1 100644 --- a/src/backend/access/aocs/aocsam.c +++ b/src/backend/access/aocs/aocsam.c @@ -986,7 +986,6 @@ aocs_insert_init(Relation rel, int segno) desc = (AOCSInsertDesc) palloc0(sizeof(AOCSInsertDescData)); desc->aoi_rel = rel; desc->appendOnlyMetaDataSnapshot = RegisterSnapshot(GetCatalogSnapshot(InvalidOid)); - desc->insertMultiFiles = false; /* * Writers uses this since they have exclusive access to the lock acquired @@ -1047,9 +1046,6 @@ aocs_insert_init(Relation rel, int segno) (FileSegInfo *) desc->fsInfo, desc->lastSequence, rel, segno, tupleDesc->natts, true); - /* Should not enable insertMultiFiles if the table is created by own transaction or in utility mode */ - if (Gp_role != GP_ROLE_UTILITY) - desc->insertMultiFiles = gp_appendonly_insert_files > 1 && !ShouldUseReservedSegno(rel, CHOOSE_MODE_WRITE); return desc; } diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 4c6bd38e778..950fc0f8547 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -88,9 +88,18 @@ typedef struct AOCODMLState { Oid relationOid; AOCSInsertDesc insertDesc; - dlist_head head; // Head of multiple segment files insertion list. AOCSDeleteDesc deleteDesc; AOCSUniqueCheckDesc uniqueCheckDesc; + /* + * CBDB_PARALLEL + * head: the Head of multiple segment files insertion list. + * insertMultiFiles: number of seg files to be inserted into. + * used_segment_files: used to avoid used files when asking + * for a new segment file. + */ + dlist_head head; + int insertMultiFiles; + List* used_segment_files; } AOCODMLState; static void reset_state_cb(void *arg); @@ -191,6 +200,8 @@ enter_dml_state(const Oid relationOid) state->insertDesc = NULL; state->deleteDesc = NULL; state->uniqueCheckDesc = NULL; + state->insertMultiFiles = 0; + state->used_segment_files = NIL; dlist_init(&state->head); Assert(!found); @@ -341,48 +352,41 @@ get_insert_descriptor(const Relation relation) if (state->insertDesc == NULL) { - List *segments = NIL; MemoryContext oldcxt; + /* + * CBDB_PARALLEL: + * Should not enable insertMultiFiles if the table is created by own transaction + * or in utility mode. + */ + if (Gp_role != GP_ROLE_UTILITY && + gp_appendonly_insert_files > 1 && + !ShouldUseReservedSegno(relation, CHOOSE_MODE_WRITE)) + state->insertMultiFiles = gp_appendonly_insert_files; + oldcxt = MemoryContextSwitchTo(aocoLocal.stateCxt); state->insertDesc = aocs_insert_init(relation, ChooseSegnoForWrite(relation)); - - dlist_init(&state->head); - dlist_head *head = &state->head; - dlist_push_tail(head, &state->insertDesc->node); - - if (state->insertDesc->insertMultiFiles) - { - segments = lappend_int(segments, state->insertDesc->cur_segno); - for (int i = 0; i < gp_appendonly_insert_files - 1; i++) - { - next = aocs_insert_init(relation, - ChooseSegnoForWriteMultiFile(relation, segments)); - dlist_push_tail(head, &next->node); - segments = lappend_int(segments, next->cur_segno); - } - list_free(segments); - } - //* mark all insertDesc placeholderInserted with false */ - if (relationHasUniqueIndex(relation)) - { - dlist_iter iter; - dlist_foreach(iter, head) - { - AOCSInsertDesc insertDesc = (AOCSInsertDesc)dlist_container(AOCSInsertDescData, node, iter.cur); - insertDesc->placeholderInserted = false; - } - } + state->used_segment_files = list_make1_int(state->insertDesc->cur_segno); + dlist_init(&state->head); + dlist_push_tail(&state->head, &state->insertDesc->node); MemoryContextSwitchTo(oldcxt); } /* switch insertDesc */ - if (state->insertDesc->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range) + if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range) { state->insertDesc->range = 0; + + if (list_length(state->used_segment_files) < state->insertMultiFiles) + { + next = aocs_insert_init(relation, ChooseSegnoForWriteMultiFile(relation, state->used_segment_files)); + dlist_push_tail(&state->head, &next->node); + state->used_segment_files = lappend_int(state->used_segment_files, next->cur_segno); + } + if (!dlist_has_next(&state->head, &state->insertDesc->node)) next = (AOCSInsertDesc)dlist_container(AOCSInsertDescData, node, dlist_head_node(&state->head)); else diff --git a/src/backend/access/appendonly/appendonlyam.c b/src/backend/access/appendonly/appendonlyam.c index 1498c2f7419..e90bd2093cb 100755 --- a/src/backend/access/appendonly/appendonlyam.c +++ b/src/backend/access/appendonly/appendonlyam.c @@ -2634,7 +2634,6 @@ appendonly_insert_init(Relation rel, int segno) aoInsertDesc->aoi_rel = rel; aoInsertDesc->range = 0; - aoInsertDesc->insertMultiFiles = false; /* * We want to see an up-to-date view of the metadata. The target segment's @@ -2822,9 +2821,6 @@ aoInsertDesc->appendOnlyMetaDataSnapshot, //CONCERN:Safe to assume all block dir aoInsertDesc->fsInfo, aoInsertDesc->lastSequence, rel, segno, 1, false); - /* Should not enable insertMultiFiles if the table is created by own transaction or in utility mode */ - if (Gp_role != GP_ROLE_UTILITY) - aoInsertDesc->insertMultiFiles = gp_appendonly_insert_files > 1 && !ShouldUseReservedSegno(rel, CHOOSE_MODE_WRITE); return aoInsertDesc; } diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index 22a47d7ab2c..ba06c62bfe0 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -59,9 +59,19 @@ typedef struct AppendOnlyDMLState { Oid relationOid; AppendOnlyInsertDesc insertDesc; - dlist_head head; // Head of multiple segment files insertion list. AppendOnlyDeleteDesc deleteDesc; AppendOnlyUniqueCheckDesc uniqueCheckDesc; + + /* + * CBDB_PARALLEL + * head: the Head of multiple segment files insertion list. + * insertMultiFiles: number of seg files to be inserted into. + * used_segment_files: used to avoid used files when asking + * for a new segment file. + */ + dlist_head head; + int insertMultiFiles; + List* used_segment_files; } AppendOnlyDMLState; @@ -162,6 +172,8 @@ enter_dml_state(const Oid relationOid) state->insertDesc = NULL; state->deleteDesc = NULL; state->uniqueCheckDesc = NULL; + state->insertMultiFiles = 0; + state->used_segment_files = NIL; dlist_init(&state->head); Assert(!found); @@ -326,48 +338,41 @@ get_insert_descriptor(const Relation relation) if (state->insertDesc == NULL) { - List *segments = NIL; MemoryContext oldcxt; + /* + * CBDB_PARALLEL: + * Should not enable insertMultiFiles if the table is created by own transaction + * or in utility mode. + */ + if (Gp_role != GP_ROLE_UTILITY && + gp_appendonly_insert_files > 1 && + !ShouldUseReservedSegno(relation, CHOOSE_MODE_WRITE)) + state->insertMultiFiles = gp_appendonly_insert_files; + oldcxt = MemoryContextSwitchTo(appendOnlyLocal.stateCxt); state->insertDesc= appendonly_insert_init(relation, ChooseSegnoForWrite(relation)); + state->used_segment_files = list_make1_int(state->insertDesc->cur_segno); dlist_init(&state->head); - dlist_head *head = &state->head; - dlist_push_tail(head, &state->insertDesc->node); - - if (state->insertDesc->insertMultiFiles) - { - segments = lappend_int(segments, state->insertDesc->cur_segno); - for (int i = 0; i < gp_appendonly_insert_files - 1; i++) - { - next = appendonly_insert_init(relation, - ChooseSegnoForWriteMultiFile(relation, segments)); - dlist_push_tail(head, &next->node); - segments = lappend_int(segments, next->cur_segno); - } - list_free(segments); - } - - //* mark all insertDesc placeholderInserted with false */ - if (relationHasUniqueIndex(relation)) - { - dlist_iter iter; - dlist_foreach(iter, head) - { - AppendOnlyInsertDesc insertDesc = (AppendOnlyInsertDesc)dlist_container(AppendOnlyInsertDescData, node, iter.cur); - insertDesc->placeholderInserted = false; - } - } + dlist_push_tail(&state->head, &state->insertDesc->node); MemoryContextSwitchTo(oldcxt); } /* switch insertDesc */ - if (state->insertDesc->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range) + if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range) { state->insertDesc->range = 0; + + if (list_length(state->used_segment_files) < state->insertMultiFiles) + { + next = appendonly_insert_init(relation, ChooseSegnoForWriteMultiFile(relation, state->used_segment_files)); + dlist_push_tail(&state->head, &next->node); + state->used_segment_files = lappend_int(state->used_segment_files, next->cur_segno); + } + if (!dlist_has_next(&state->head, &state->insertDesc->node)) next = (AppendOnlyInsertDesc)dlist_container(AppendOnlyInsertDescData, node, dlist_head_node(&state->head)); else diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index 78b3471ce4c..27dc2c7f7ac 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -3166,7 +3166,7 @@ struct config_int ConfigureNamesInt_gp[] = NULL }, &gp_appendonly_insert_files, - 4 /* CBDB_PARALLEL If default value is changed, set it in src/test/regress/GNUMakefile too, see details there */, 0, 127, + 4, 0, 127, NULL, NULL, NULL }, diff --git a/src/include/cdb/cdbaocsam.h b/src/include/cdb/cdbaocsam.h index ed06d6cb522..49c0aaa977b 100644 --- a/src/include/cdb/cdbaocsam.h +++ b/src/include/cdb/cdbaocsam.h @@ -74,7 +74,6 @@ typedef struct AOCSInsertDescData /* * For multiple segment files insertion. */ - bool insertMultiFiles; /* insert into multi files */ dlist_node node; /* node of segfiles list */ int range; /* inserted tuples of each range */ /* flag for insert placeholder in unique index */ diff --git a/src/include/cdb/cdbappendonlyam.h b/src/include/cdb/cdbappendonlyam.h index 1242b3b8ce7..10369ec7555 100644 --- a/src/include/cdb/cdbappendonlyam.h +++ b/src/include/cdb/cdbappendonlyam.h @@ -112,10 +112,6 @@ typedef struct AppendOnlyInsertDescData /* The block directory for the appendonly relation. */ AppendOnlyBlockDirectory blockDirectory; - /* - * For multiple segment files insertion. - */ - bool insertMultiFiles; /* insert into multi files */ dlist_node node; /* node of segfiles list */ int range; /* inserted tuples of each range */ /* flag for insert placeholder in unique index */ diff --git a/src/test/isolation2/Makefile b/src/test/isolation2/Makefile index aec939457e7..9ac85e8d756 100644 --- a/src/test/isolation2/Makefile +++ b/src/test/isolation2/Makefile @@ -63,11 +63,7 @@ clean distclean: install: all gpdiff.pl gpstringsubs.pl installcheck: install installcheck-parallel-retrieve-cursor - (\ - gpconfig -c gp_appendonly_insert_files -v 0; \ - gpstop -u; \ - $(pg_isolation2_regress_installcheck) --init-file=$(top_builddir)/src/test/regress/init_file --init-file=./init_file_isolation2 --schedule=$(srcdir)/isolation2_schedule \ - ) + $(pg_isolation2_regress_installcheck) --init-file=$(top_builddir)/src/test/regress/init_file --init-file=./init_file_isolation2 --schedule=$(srcdir)/isolation2_schedule installcheck-resgroup: install $(pg_isolation2_regress_installcheck) --init-file=$(top_builddir)/src/test/regress/init_file --init-file=./init_file_resgroup --dbname=isolation2resgrouptest --schedule=$(srcdir)/isolation2_resgroup_schedule @@ -78,8 +74,6 @@ installcheck-parallel-retrieve-cursor: install # Add a new rule for running installcheck with parallel mode enabled. installcheck-cbdb-parallel: install (\ - gpconfig -c gp_appendonly_insert_files -v 0; \ - gpstop -u; \ export PGOPTIONS='-c optimizer=off -c enable_parallel=true'; \ $(pg_isolation2_regress_installcheck) --init-file=$(top_builddir)/src/test/regress/init_file --init-file=./init_file_isolation2 --schedule=$(srcdir)/isolation2_schedule \ ) \ No newline at end of file diff --git a/src/test/isolation2/expected/ao_blkdir.out b/src/test/isolation2/expected/ao_blkdir.out index c798a23c8f6..f202d8ec4da 100644 --- a/src/test/isolation2/expected/ao_blkdir.out +++ b/src/test/isolation2/expected/ao_blkdir.out @@ -58,9 +58,13 @@ SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id TRUNCATE ao_blkdir_test; TRUNCATE +set gp_appendonly_insert_files = 0; +SET -- Insert enough rows to overflow the first block directory minipage by 2. INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(1, 292700) i; INSERT 292700 +reset gp_appendonly_insert_files; +RESET -- There should be 2 block directory rows, one with 161 entries covering 292698 -- rows and the other with 1 entry covering the 2 overflow rows. SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; @@ -380,8 +384,12 @@ SELECT (gp_toolkit.__gp_aoblkdir('aoco_blkdir_test')).* FROM gp_dist_random('gp_ TRUNCATE aoco_blkdir_test; TRUNCATE -- Insert enough rows to overflow the first block directory minipage by 2. +set gp_appendonly_insert_files = 0; +SET INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(1, 1317143) i; INSERT 1317143 +reset gp_appendonly_insert_files; +RESET -- There should be 2 block directory rows, 2 for each column, one with 161 -- entries covering 1317141 rows and the other with 1 entry covering the 2 -- overflow rows. diff --git a/src/test/isolation2/expected/ao_unique_index.out b/src/test/isolation2/expected/ao_unique_index.out index 8748b85bd69..ea89389e841 100644 --- a/src/test/isolation2/expected/ao_unique_index.out +++ b/src/test/isolation2/expected/ao_unique_index.out @@ -302,6 +302,8 @@ SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', Success: Success: (3 rows) +1: set gp_appendonly_insert_files = 0; +SET 1&: INSERT INTO unique_index_ao_row SELECT generate_series(1, 661510); -- Wait until we have inserted (2048 * (161 * 2 + 1) + 3) = 661507 rows and we diff --git a/src/test/isolation2/expected/aocs_unique_index.out b/src/test/isolation2/expected/aocs_unique_index.out index 839dc5b159d..1c3efd3794c 100644 --- a/src/test/isolation2/expected/aocs_unique_index.out +++ b/src/test/isolation2/expected/aocs_unique_index.out @@ -302,6 +302,8 @@ SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', Success: Success: (3 rows) +1: set gp_appendonly_insert_files = 0; +SET 1&: INSERT INTO unique_index_ao_column SELECT generate_series(1, 1321075); -- Wait until we have inserted (4090 * (161 * 2 + 1) + 3) = 1321073 rows and we diff --git a/src/test/isolation2/sql/ao_blkdir.sql b/src/test/isolation2/sql/ao_blkdir.sql index f7e3074dba3..cd8f1b662d0 100644 --- a/src/test/isolation2/sql/ao_blkdir.sql +++ b/src/test/isolation2/sql/ao_blkdir.sql @@ -34,8 +34,10 @@ SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; TRUNCATE ao_blkdir_test; +set gp_appendonly_insert_files = 0; -- Insert enough rows to overflow the first block directory minipage by 2. INSERT INTO ao_blkdir_test SELECT i, 2 FROM generate_series(1, 292700) i; +reset gp_appendonly_insert_files; -- There should be 2 block directory rows, one with 161 entries covering 292698 -- rows and the other with 1 entry covering the 2 overflow rows. SELECT (gp_toolkit.__gp_aoblkdir('ao_blkdir_test')).* FROM gp_dist_random('gp_id') @@ -128,7 +130,9 @@ WHERE gp_segment_id = 0 ORDER BY 1,2,3,4,5; TRUNCATE aoco_blkdir_test; -- Insert enough rows to overflow the first block directory minipage by 2. +set gp_appendonly_insert_files = 0; INSERT INTO aoco_blkdir_test SELECT i, 2 FROM generate_series(1, 1317143) i; +reset gp_appendonly_insert_files; -- There should be 2 block directory rows, 2 for each column, one with 161 -- entries covering 1317141 rows and the other with 1 entry covering the 2 -- overflow rows. diff --git a/src/test/isolation2/sql/ao_unique_index.sql b/src/test/isolation2/sql/ao_unique_index.sql index 68c2faad128..55e1da5aae8 100644 --- a/src/test/isolation2/sql/ao_unique_index.sql +++ b/src/test/isolation2/sql/ao_unique_index.sql @@ -211,6 +211,7 @@ CREATE TABLE unique_index_ao_row (a INT unique) USING ao_row SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', '', 2, 2, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1: set gp_appendonly_insert_files = 0; 1&: INSERT INTO unique_index_ao_row SELECT generate_series(1, 661510); -- Wait until we have inserted (2048 * (161 * 2 + 1) + 3) = 661507 rows and we diff --git a/src/test/isolation2/sql/aocs_unique_index.sql b/src/test/isolation2/sql/aocs_unique_index.sql index e2624f6da9f..abe702de93e 100644 --- a/src/test/isolation2/sql/aocs_unique_index.sql +++ b/src/test/isolation2/sql/aocs_unique_index.sql @@ -211,6 +211,7 @@ CREATE TABLE unique_index_ao_column (a bigint unique) USING ao_column SELECT gp_inject_fault('insert_new_entry_curr_minipage_full', 'suspend', '', '', '', 2, 2, 0, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content <> -1; +1: set gp_appendonly_insert_files = 0; 1&: INSERT INTO unique_index_ao_column SELECT generate_series(1, 1321075); -- Wait until we have inserted (4090 * (161 * 2 + 1) + 3) = 1321073 rows and we diff --git a/src/test/regress/GNUmakefile b/src/test/regress/GNUmakefile index d16d70385a5..dcc38682a9a 100644 --- a/src/test/regress/GNUmakefile +++ b/src/test/regress/GNUmakefile @@ -204,18 +204,8 @@ installcheck: installcheck-good installcheck-small: all $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(EXTRA_TESTS) -# We have set default gp_appendonly_insert_files = 4 for user's underlying parallelism. -# But that will cause lots of diffs between non-parallel and parallel mode that we can't -# afford to modify them one by one, or create lots of answer files(and we didn't support that yet). -# Any tests related to ao parallel should set GUCs in their own sql files, like gp_parallel. -# Using gpconfig is a workaround for regression test for non-parallel and parallel mode. -# Same for src/test/isolation2/Makefile installcheck-good: all twophase_pqexecparams hooktest query_info_hook_test - (\ - gpconfig -c gp_appendonly_insert_files -v 0; \ - gpstop -u; \ - $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule --schedule=$(srcdir)/greenplum_schedule $(EXTRA_TESTS) \ - ) + $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule --schedule=$(srcdir)/greenplum_schedule $(EXTRA_TESTS) installcheck-parallel: all $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(MAXCONNOPT) $(EXTRA_TESTS) @@ -223,7 +213,7 @@ installcheck-parallel: all # cbdb parallel plan tests, ignore the incompatible cases, should run with 5 GUCs like below: installcheck-cbdb-parallel: all twophase_pqexecparams ( \ - export PGOPTIONS='-c optimizer=off -c enable_parallel=true -c min_parallel_table_scan_size=0 -c min_parallel_index_scan_size=0 -c force_parallel_mode=1 -c gp_appendonly_insert_files=0'; \ + export PGOPTIONS='-c optimizer=off -c enable_parallel=true -c min_parallel_table_scan_size=0 -c min_parallel_index_scan_size=0 -c force_parallel_mode=1'; \ $(pg_regress_installcheck) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule --schedule=$(srcdir)/greenplum_schedule $(MAXCONNOPT) $(EXTRA_TESTS) --exclude-tests explain \ ) diff --git a/src/test/regress/expected/gp_parallel.out b/src/test/regress/expected/gp_parallel.out index b9c6db2e4a4..a26000c1bfc 100644 --- a/src/test/regress/expected/gp_parallel.out +++ b/src/test/regress/expected/gp_parallel.out @@ -41,29 +41,13 @@ set gp_appendonly_insert_files = 4; begin; set local enable_parallel = on; create table test_131_ao1(x int, y int) using ao_row with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao2(x int, y int) using ao_row with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao3(x int, y int) using ao_row with(parallel_workers=0); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao4(x int, y int) using ao_row with(parallel_workers=0); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco1(x int, y int) using ao_column with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco2(x int, y int) using ao_column with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco3(x int, y int) using ao_column with(parallel_workers=0); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco4(x int, y int) using ao_column with(parallel_workers=0); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. select relname, reloptions from pg_catalog.pg_class where relname like 'test_131_ao%'; relname | reloptions ----------------+---------------------- @@ -171,14 +155,8 @@ explain(locus, costs off) select count(*) from test_131_aoco3, test_131_aoco4 wh abort; create table ao1(x int, y int) with(appendonly=true); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table ao2(x int, y int) with(appendonly=true); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table aocs1(x int, y int) with(appendonly=true, orientation=column); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. begin; -- encourage use of parallel plans set local min_parallel_table_scan_size = 0; @@ -187,6 +165,7 @@ set local max_parallel_workers_per_gather = 4; set local enable_parallel = off; -- insert multiple segfiles for parallel set local gp_appendonly_insert_files = 4; +set local gp_appendonly_insert_files_tuples_range = 50000; -- test appendonly table parallel insert into ao1 select i, i from generate_series(1, 1200000) g(i); analyze ao1; @@ -410,17 +389,94 @@ select count(c2) from t1; (1 row) abort; +-- test segfilecount according to data volume. +create table ao_segfilecount(x int, y int) with(appendonly=true); +create table aocs_segfilecount(x int, y int) with(appendonly=true, orientation=column); +begin; +set local gp_appendonly_insert_files = 5; +set local gp_appendonly_insert_files_tuples_range = 10; +-- no enough data, open only one segment file. +insert into ao_segfilecount select i, i from generate_series(1, 29) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + segfilecount +-------------- + 1 +(1 row) + +-- no enough data, open two segment files. +insert into ao_segfilecount select i, i from generate_series(1, 60) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + segfilecount +-------------- + 2 +(1 row) + +-- proper data, open segment file according to GUC. +insert into ao_segfilecount select i, i from generate_series(1, 150) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + segfilecount +-------------- + 5 +(1 row) + +-- excess data, open segment file according to GUC. +insert into ao_segfilecount select i, i from generate_series(1, 200) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + segfilecount +-------------- + 5 +(1 row) + +-- no enough data, open only one segment file. +insert into aocs_segfilecount select i, i from generate_series(1, 29) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; + segfilecount +-------------- + 1 +(1 row) + +-- no enough data, open two segment files. +insert into aocs_segfilecount select i, i from generate_series(1, 60) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; + segfilecount +-------------- + 2 +(1 row) + +-- proper data, open segment file according to GUC. +insert into aocs_segfilecount select i, i from generate_series(1, 150) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; + segfilecount +-------------- + 5 +(1 row) + +-- excess data, open segment file according to GUC. +insert into aocs_segfilecount select i, i from generate_series(1, 200) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; + segfilecount +-------------- + 5 +(1 row) + +abort; +drop table ao_segfilecount; +drop table aocs_segfilecount; -- test gp_appendonly_insert_files doesn't take effect begin; create table t (x int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t select i from generate_series(1, 1000) i; set local gp_appendonly_insert_files=4; set local gp_appendonly_insert_files_tuples_range = 10; create table ao1 using ao_row as select * from t; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze ao1; select segfilecount from pg_appendonly where relid='ao1'::regclass; segfilecount @@ -429,8 +485,6 @@ select segfilecount from pg_appendonly where relid='ao1'::regclass; (1 row) create table ao2 with(appendonly=true) as select * from t; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze ao2; select segfilecount from pg_appendonly where relid='ao2'::regclass; segfilecount @@ -439,8 +493,6 @@ select segfilecount from pg_appendonly where relid='ao2'::regclass; (1 row) create table aocs1 using ao_column as select * from t; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aocs1; select segfilecount from pg_appendonly where relid='aocs1'::regclass; segfilecount @@ -449,8 +501,6 @@ select segfilecount from pg_appendonly where relid='aocs1'::regclass; (1 row) create table aocs2 with(appendonly=true, orientation=column) as select * from t; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'x' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze aocs2; select segfilecount from pg_appendonly where relid='aocs2'::regclass; segfilecount @@ -463,8 +513,6 @@ abort; begin; set local max_parallel_workers_per_gather = 2; create table t1(a int, b int) with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt1(a int, b int) with(parallel_workers=2) distributed replicated; create table rt2(a int, b int) distributed replicated; create table rt3(a int, b int) distributed replicated; @@ -877,8 +925,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; (19 rows) create table t2(a int, b int) with(parallel_workers=0); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt4(a int, b int) with(parallel_workers=2) distributed replicated; insert into t2 select i, i+1 from generate_series(1, 10) i; insert into rt4 select i, i+1 from generate_series(1, 10000) i; @@ -941,8 +987,6 @@ select * from rt4 join t2 using(b); (10 rows) create table t3(a int, b int) with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t3 select i, i+1 from generate_series(1, 9000) i; analyze t3; set local enable_parallel = off; @@ -1161,11 +1205,7 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=3); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(b int, a int) with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10) i; insert into t2 select i, i+1 from generate_series(1, 5) i; analyze t1; @@ -1197,11 +1237,7 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int) with(parallel_workers=2); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i%10, i from generate_series(1, 5) i; insert into t1 values (100000); insert into t2 select i%10, i from generate_series(1, 100000) i; @@ -1433,18 +1469,12 @@ begin; create table rt1(a int, b int) distributed replicated; create table rt2(a int, b int) with (parallel_workers = 0) distributed replicated; create table t1(a int, b int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int) with (parallel_workers = 0); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10000) i; insert into t2 select i, i+1 from generate_series(1, 10000) i; insert into rt1 select i, i+1 from generate_series(1, 10000) i; insert into rt2 select i, i+1 from generate_series(1, 10000) i; CREATE TABLE sq1 AS SELECT a, b FROM t1 WHERE gp_segment_id = 0; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. set local optimizer=off; set local enable_parallel=on; set local min_parallel_table_scan_size to 0; @@ -1884,8 +1914,6 @@ set local optimizer = off; set local enable_parallel = on; -- ao table create table ao (a INT, b INT) using ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into ao select i as a, i as b from generate_series(1, 100) AS i; alter table ao set (parallel_workers = 2); explain(costs off) select count(*) from ao; @@ -1907,8 +1935,6 @@ select count(*) from ao; alter table ao reset (parallel_workers); -- aocs table create table aocs (a INT, b INT) using ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into aocs select i as a, i as b from generate_series(1, 100) AS i; alter table aocs set (parallel_workers = 2); explain(costs off) select count(*) from aocs; diff --git a/src/test/regress/sql/gp_parallel.sql b/src/test/regress/sql/gp_parallel.sql index f7d319058a4..1a175d62888 100644 --- a/src/test/regress/sql/gp_parallel.sql +++ b/src/test/regress/sql/gp_parallel.sql @@ -72,6 +72,7 @@ set local enable_parallel = off; -- insert multiple segfiles for parallel set local gp_appendonly_insert_files = 4; +set local gp_appendonly_insert_files_tuples_range = 50000; -- test appendonly table parallel insert into ao1 select i, i from generate_series(1, 1200000) g(i); @@ -144,6 +145,55 @@ explain(locus, costs off) select count(c2) from t1; select count(c2) from t1; abort; +-- test segfilecount according to data volume. +create table ao_segfilecount(x int, y int) with(appendonly=true); +create table aocs_segfilecount(x int, y int) with(appendonly=true, orientation=column); +begin; +set local gp_appendonly_insert_files = 5; +set local gp_appendonly_insert_files_tuples_range = 10; + +-- no enough data, open only one segment file. +insert into ao_segfilecount select i, i from generate_series(1, 29) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + +-- no enough data, open two segment files. +insert into ao_segfilecount select i, i from generate_series(1, 60) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + +-- proper data, open segment file according to GUC. +insert into ao_segfilecount select i, i from generate_series(1, 150) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + +-- excess data, open segment file according to GUC. +insert into ao_segfilecount select i, i from generate_series(1, 200) g(i); +analyze ao_segfilecount; +select segfilecount from pg_appendonly where relid='ao_segfilecount'::regclass; + +-- no enough data, open only one segment file. +insert into aocs_segfilecount select i, i from generate_series(1, 29) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; + +-- no enough data, open two segment files. +insert into aocs_segfilecount select i, i from generate_series(1, 60) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; + +-- proper data, open segment file according to GUC. +insert into aocs_segfilecount select i, i from generate_series(1, 150) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; + +-- excess data, open segment file according to GUC. +insert into aocs_segfilecount select i, i from generate_series(1, 200) g(i); +analyze aocs_segfilecount; +select segfilecount from pg_appendonly where relid='aocs_segfilecount'::regclass; +abort; +drop table ao_segfilecount; +drop table aocs_segfilecount; -- test gp_appendonly_insert_files doesn't take effect begin;