diff --git a/.github/workflows/build_external_fts.yml b/.github/workflows/build_external_fts.yml index edeef44b1c7..0b9c459f285 100644 --- a/.github/workflows/build_external_fts.yml +++ b/.github/workflows/build_external_fts.yml @@ -39,7 +39,7 @@ jobs: needs: build runs-on: [self-hosted, example] env: - MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off -c gp_appendonly_insert_files=0' installcheck-world" + MAKE_TEST_COMMAND: "-k PGOPTIONS='-c optimizer=off' installcheck-world" TEST_OS: "centos" DUMP_DB: "true" steps: diff --git a/src/backend/access/aocs/aocsam_handler.c b/src/backend/access/aocs/aocsam_handler.c index 6ff7ff67e92..dc17b81f697 100644 --- a/src/backend/access/aocs/aocsam_handler.c +++ b/src/backend/access/aocs/aocsam_handler.c @@ -316,6 +316,9 @@ aoco_dml_finish(Relation relation, CmdType operation) Assert(state->insertDesc->aoi_rel == relation); aocs_insert_finish(state->insertDesc, &state->head); state->insertDesc = NULL; + state->insertMultiFiles = 0; + pfree(state->used_segment_files); + state->used_segment_files = NIL; } if (state->uniqueCheckDesc) @@ -1071,9 +1074,21 @@ aoco_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, AOCSInsertDesc insertDesc; insertDesc = get_insert_descriptor(relation); + AOCODMLState *state; + state = find_dml_state(RelationGetRelid(relation)); + for (int i = 0; i < ntuples; i++) { slot_getallattrs(slots[i]); + /* + * For bulk insert, we may switch insertDesc + * on the fly. + */ + if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range) + { + insertDesc = get_insert_descriptor(relation); + } + aocs_insert_values(insertDesc, slots[i]->tts_values, slots[i]->tts_isnull, (AOTupleId *) &slots[i]->tts_tid); } diff --git a/src/backend/access/appendonly/appendonlyam_handler.c b/src/backend/access/appendonly/appendonlyam_handler.c index ce4550c62be..cf09e97a2f6 100644 --- a/src/backend/access/appendonly/appendonlyam_handler.c +++ b/src/backend/access/appendonly/appendonlyam_handler.c @@ -286,6 +286,9 @@ appendonly_dml_finish(Relation relation, CmdType operation) Assert(state->insertDesc->aoi_rel == relation); appendonly_insert_finish(state->insertDesc, &state->head); state->insertDesc = NULL; + state->insertMultiFiles = 0; + pfree(state->used_segment_files); + state->used_segment_files = NIL; } if (state->uniqueCheckDesc) @@ -930,10 +933,12 @@ appendonly_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { AppendOnlyInsertDesc insertDesc; + AppendOnlyDMLState *state; MemTuple *mtuple; int ndone = 0; int nthisBlock = 0; insertDesc = get_insert_descriptor(relation); + state = find_dml_state(RelationGetRelid(relation)); Oid tableOid = RelationGetRelid(relation); mtuple = palloc(ntuples * sizeof(MemTuple)); for (int i = 0; i < ntuples; i++) @@ -943,11 +948,29 @@ appendonly_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, } while (ndone < ntuples) { + /* + * For bulk insert, we may switch insertDesc + * on the fly. + */ + if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range) + { + insertDesc = get_insert_descriptor(relation); + } + appendonly_insert(insertDesc, mtuple[ndone], (AOTupleId *) &slots[ndone]->tts_tid); for (nthisBlock = 1; ndone + nthisBlock < ntuples; nthisBlock++) { if (insertDesc->useNoToast) { + /* + * This is a hack way to insert into AO of CBDB. + * Check switch insertDesc again. + */ + if (state->insertMultiFiles && state->insertDesc->range == gp_appendonly_insert_files_tuples_range) + { + insertDesc = get_insert_descriptor(relation); + } + MemTuple tup = mtuple[ndone + nthisBlock] ; uint8 *itemPtr = NULL; VarBlockByteLen itemLen; @@ -968,6 +991,7 @@ appendonly_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, { memcpy(itemPtr, tup, itemLen); insertDesc->insertCount++; + insertDesc->range++; insertDesc->lastSequence++; if (insertDesc->numSequences > 0) (insertDesc->numSequences)--; diff --git a/src/test/regress/expected/ao_segfile.out b/src/test/regress/expected/ao_segfile.out new file mode 100644 index 00000000000..b1f0c92d60b --- /dev/null +++ b/src/test/regress/expected/ao_segfile.out @@ -0,0 +1,64 @@ +create schema ao_segfile; +set search_path to ao_segfile; +set gp_appendonly_insert_files = 4; +-- ao table +create table ao_copy (a int) using ao_row; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass; + segfilecount +-------------- + 0 +(1 row) + +set gp_appendonly_insert_files_tuples_range = 1; +-- ensure 4 files on 3 segments +COPY ao_copy from stdin; +analyze ao_copy; +select count(*) from ao_copy; + count +------- + 20 +(1 row) + +select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass; + segfilecount +-------------- + 4 +(1 row) + +reset gp_appendonly_insert_files_tuples_range; +-- aocs table +create table aocs_copy (a int) using ao_column; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass; + segfilecount +-------------- + 0 +(1 row) + +set gp_appendonly_insert_files_tuples_range = 1; +-- ensure 4 files on 3 segments +COPY aocs_copy from stdin; +analyze aocs_copy; +select count(*) from aocs_copy; + count +------- + 20 +(1 row) + +select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass; + segfilecount +-------------- + 4 +(1 row) + +reset gp_appendonly_insert_files_tuples_range; +reset gp_appendonly_insert_files; +-- start_ignore +drop schema ao_segfile cascade; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to table ao_copy +drop cascades to table aocs_copy +-- end_ignore diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index a9892846caf..89d3d499e36 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -332,4 +332,7 @@ test: subtrx_overflow test: bfv_meta_track +# tests of ao/aoco seg file count for parallel plan +test: ao_segfile + # end of tests diff --git a/src/test/regress/sql/ao_segfile.sql b/src/test/regress/sql/ao_segfile.sql new file mode 100644 index 00000000000..ccd428f9f52 --- /dev/null +++ b/src/test/regress/sql/ao_segfile.sql @@ -0,0 +1,74 @@ +create schema ao_segfile; +set search_path to ao_segfile; +set gp_appendonly_insert_files = 4; + +-- ao table +create table ao_copy (a int) using ao_row; +select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass; +set gp_appendonly_insert_files_tuples_range = 1; +-- ensure 4 files on 3 segments +COPY ao_copy from stdin; +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +\. + +analyze ao_copy; +select count(*) from ao_copy; +select segfilecount from pg_appendonly where relid = 'ao_copy'::regclass; +reset gp_appendonly_insert_files_tuples_range; + +-- aocs table +create table aocs_copy (a int) using ao_column; +select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass; +set gp_appendonly_insert_files_tuples_range = 1; +-- ensure 4 files on 3 segments +COPY aocs_copy from stdin; +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +\. + +analyze aocs_copy; +select count(*) from aocs_copy; +select segfilecount from pg_appendonly where relid = 'aocs_copy'::regclass; +reset gp_appendonly_insert_files_tuples_range; +reset gp_appendonly_insert_files; + +-- start_ignore +drop schema ao_segfile cascade; +-- end_ignore