From f5767b3e86708171197e2cd6bbc0d03ec741391f Mon Sep 17 00:00:00 2001 From: jinzx10 Date: Wed, 22 May 2024 15:56:27 +0800 Subject: [PATCH 1/3] clean up the BLACS initialzation code --- source/module_base/blacs_connector.h | 5 + source/module_basis/module_ao/parallel_2d.cpp | 208 +++--------------- 2 files changed, 37 insertions(+), 176 deletions(-) diff --git a/source/module_base/blacs_connector.h b/source/module_base/blacs_connector.h index 98fd06fcde..da4756df78 100644 --- a/source/module_base/blacs_connector.h +++ b/source/module_base/blacs_connector.h @@ -26,6 +26,9 @@ //====================================8<---------------------------------------- // blacs // Initialization + +#include + extern "C" { void Cblacs_pinfo(int *myid, int *nprocs); @@ -37,4 +40,6 @@ extern "C" int Cblacs_pnum(int icontxt, int prow, int pcol); void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol); void Cblacs_exit(int icontxt); + + int Csys2blacs_handle(MPI_Comm SysCtxt); } diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp index 754e94184a..79fe1d96d3 100644 --- a/source/module_basis/module_ao/parallel_2d.cpp +++ b/source/module_basis/module_ao/parallel_2d.cpp @@ -11,35 +11,23 @@ Parallel_2D::~Parallel_2D() void Parallel_2D::set_proc_dim(const int& dsize, bool mode /*= 0*/) { - if (mode) //dim0 >= dim1 + // default mode = 0: dim0 <= dim1 + this->dim0 = (int)sqrt((double)dsize); + while (dsize % this->dim0 != 0) { - this->dim1 = (int)sqrt((double)dsize); - while (dsize % this->dim1 != 0) - { - this->dim1 = this->dim1 - 1; - } - assert(this->dim1 > 0); - this->dim0 = dsize / this->dim1; + this->dim0 = this->dim0 - 1; } - else //dim0 <= dim1 - { - this->dim0 = (int)sqrt((double)dsize); - while (dsize % this->dim0 != 0) - { - this->dim0 = this->dim0 - 1; - } - assert(this->dim0 > 0); - this->dim1 = dsize / this->dim0; + assert(this->dim0 > 0); + this->dim1 = dsize / this->dim0; + + if (mode) { // mode = 1: dim0 >= dim1 + std::swap(this->dim0, this->dim1); } } bool Parallel_2D::in_this_processor(const int& iw1_all, const int& iw2_all) const { - if (global2local_row(iw1_all) == -1) - return false; - else if (global2local_col(iw2_all) == -1) - return false; - return true; + return global2local_row(iw1_all) != -1 && global2local_col(iw2_all) != -1; } void Parallel_2D::set_global2local(const int& M_A, const int& N_A, @@ -106,6 +94,11 @@ extern "C" #include "module_base/scalapack_connector.h" } +// FIXME In theory BLACS would split the given communicator to get some new +// ones for its own purpose when initializing the process grid, so there might +// be unnecessary to create a Cartesian MPI communicator in advance. +// ***This needs to be verified*** + void Parallel_2D::mpi_create_cart(const MPI_Comm& diag_world) { ModuleBase::TITLE("Parallel_2D", "mpi_create_cart"); @@ -132,20 +125,11 @@ void Parallel_2D::set_desc(const int& gr, const int& gc, const int& lld, bool fi if (first_time) { int myprow=0, mypcol=0; - int* usermap = new int[this->dim0 * this->dim1]; - for (int i = 0; i < this->dim0; ++i) - { - for (int j = 0; j < this->dim1; ++j) - { - int pcoord[2] = { i, j }; - MPI_Cart_rank(comm_2D, pcoord, &usermap[i + j * this->dim0]); - } - } - MPI_Fint comm_2D_f = MPI_Comm_c2f(comm_2D); - Cblacs_get(comm_2D_f, 0, &this->blacs_ctxt); - Cblacs_gridmap(&this->blacs_ctxt, usermap, this->dim0, this->dim0, this->dim1); - Cblacs_gridinfo(this->blacs_ctxt, &this->dim0, &this->dim1, &myprow, &mypcol); - delete[] usermap; + char order = 'R'; // row major process grid + + blacs_ctxt = Csys2blacs_handle(comm_2D); + Cblacs_gridinit(&blacs_ctxt, &order, dim0, dim1); + Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &myprow, &mypcol); } int ISRC = 0; int info = 0; @@ -166,8 +150,6 @@ int Parallel_2D::set_local2global( int dim[2]; int period[2]; - int j, end_id, block; - int row_b, col_b; // (0) every processor get it's id on the 2D comm // : ( coord[0], coord[1] ) @@ -175,150 +157,24 @@ int Parallel_2D::set_local2global( assert(dim[0] == this->dim0); assert(dim[1] == this->dim1); - // (1.1) how many blocks at least - // eg. M_A = 6400, nb = 64; - // so block = 10; - block = M_A / nb; + // local number of row and columns + const int zero = 0; + nrow = numroc_(&M_A, &nb, &coord[0], &zero, &dim0); + ncol = numroc_(&N_A, &nb, &coord[1], &zero, &dim1); + nloc = nrow * ncol; - // (1.2) If data remain, add 1. - if (block * nb < M_A) + local2global_row_.resize(nrow); + for (int i = 0; i < nrow; ++i) { - block++; + local2global_row_[i] = (i / nb) * dim0 * nb + coord[0] * nb + i % nb; } - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Total Row Blocks Number", block); - - // mohan add 2010-09-12 - if (dim[0] > block) - { - ofs_warning << " cpu 2D distribution : " << dim[0] << "*" << dim[1] << std::endl; - ofs_warning << " but, the number of row blocks is " << block << std::endl; - if (nb > 1) - { - return 1; - } - else - { - ModuleBase::WARNING_QUIT("Parallel_2D::set_local2global", "some processor has no row blocks, try a smaller 'nb2d' parameter."); - } - } - - // (2.1) row_b : how many blocks for this processor. (at least) - row_b = block / dim[0]; - - // (2.2) row_b : how many blocks in this processor. - // if there are blocks remain, some processors add 1. - if (coord[0] < block % dim[0]) + local2global_col_.resize(ncol); + for (int j = 0; j < ncol; ++j) { - row_b++; + local2global_col_[j] = (j / nb) * dim1 * nb + coord[1] * nb + j % nb; } - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local Row Block Number", row_b); - - // (3) end_id indicates the last block belong to - // which processor. - if (block % dim[0] == 0) - { - end_id = dim[0] - 1; - } - else - { - end_id = block % dim[0] - 1; - } - - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Ending Row Block in processor", end_id); - - // (4) this->nrow : how many rows in this processors : - // the one owns the last block is different. - if (coord[0] == end_id) - { - this->nrow = (row_b - 1) * nb + (M_A - (block - 1) * nb); - } - else - { - this->nrow = row_b * nb; - } - - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local rows (including nb)", this->nrow); - - // (5) local2global_row, it's a global index : - // save explicitly : every row in this processor - // belongs to which row in the global matrix. - this->local2global_row_.resize(this->nrow); - j = 0; - for (int i = 0; i < row_b; i++) - { - for (int k = 0; k < nb && (coord[0] * nb + i * nb * dim[0] + k < M_A); k++, j++) - { - this->local2global_row_[j] = coord[0] * nb + i * nb * dim[0] + k; - } - } - - // the same procedures for columns. - block = N_A / nb; - if (block * nb < N_A) - { - block++; - } - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Total Col Blocks Number", block); - - if (dim[1] > block) - { - ofs_warning << " cpu 2D distribution : " << dim[0] << "*" << dim[1] << std::endl; - ofs_warning << " but, the number of column blocks is " << block << std::endl; - if (nb > 1) - { - return 1; - } - else - { - ModuleBase::WARNING_QUIT("Parallel_2D::set_local2global", "some processor has no column blocks."); - } - } - - col_b = block / dim[1]; - if (coord[1] < block % dim[1]) - { - col_b++; - } - - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local Col Block Number", col_b); - - if (block % dim[1] == 0) - { - end_id = dim[1] - 1; - } - else - { - end_id = block % dim[1] - 1; - } - - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Ending Col Block in processor", end_id); - - if (coord[1] == end_id) - { - this->ncol = (col_b - 1) * nb + (N_A - (block - 1) * nb); - } - else - { - this->ncol = col_b * nb; - } - - if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local columns (including nb)", this->ncol); - - //set nloc - this->nloc = this->nrow * this->ncol; - - this->local2global_col_.resize(this->ncol); - - j = 0; - for (int i = 0; i < col_b; i++) - { - for (int k = 0; k < nb && (coord[1] * nb + i * nb * dim[1] + k < N_A); k++, j++) - { - this->local2global_col_[j] = coord[1] * nb + i * nb * dim[1] + k; - } - } return 0; } #else @@ -333,4 +189,4 @@ void Parallel_2D::set_serial(const int& M_A, const int& N_A) for (int i = 0; i < this->nrow; i++) this->local2global_row_[i] = i; for (int i = 0; i < this->ncol; i++) this->local2global_col_[i] = i; } -#endif \ No newline at end of file +#endif From ce010cec06d0aa4b3c214379cec86d037cf234b3 Mon Sep 17 00:00:00 2001 From: jinzx10 Date: Wed, 22 May 2024 16:33:58 +0800 Subject: [PATCH 2/3] fix pyabacus compiliation error --- source/module_base/blacs_connector.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/source/module_base/blacs_connector.h b/source/module_base/blacs_connector.h index da4756df78..cfb627a9e2 100644 --- a/source/module_base/blacs_connector.h +++ b/source/module_base/blacs_connector.h @@ -26,9 +26,6 @@ //====================================8<---------------------------------------- // blacs // Initialization - -#include - extern "C" { void Cblacs_pinfo(int *myid, int *nprocs); @@ -40,6 +37,12 @@ extern "C" int Cblacs_pnum(int icontxt, int prow, int pcol); void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol); void Cblacs_exit(int icontxt); +} +#ifdef __MPI +#include +extern "C" +{ int Csys2blacs_handle(MPI_Comm SysCtxt); } +#endif From 6ea45f046857b1b5cc9687d4882362331a39ec5a Mon Sep 17 00:00:00 2001 From: jinzx10 Date: Wed, 22 May 2024 16:58:40 +0800 Subject: [PATCH 3/3] recover error message --- source/module_basis/module_ao/parallel_2d.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp index 79fe1d96d3..a75d56b2d5 100644 --- a/source/module_basis/module_ao/parallel_2d.cpp +++ b/source/module_basis/module_ao/parallel_2d.cpp @@ -163,6 +163,23 @@ int Parallel_2D::set_local2global( ncol = numroc_(&N_A, &nb, &coord[1], &zero, &dim1); nloc = nrow * ncol; + // mohan add 2010-09-12 + if (nrow == 0 || ncol == 0) + { + ofs_warning << " cpu 2D distribution : " << dim[0] << "*" << dim[1] << std::endl; + ofs_warning << " but, the number of row and column blocks are " + << M_A / nb + (M_A % nb != 0) << "*" << N_A / nb + (N_A % nb != 0) + << std::endl; + if (nb > 1) + { + return 1; + } + else + { + ModuleBase::WARNING_QUIT("Parallel_2D::set_local2global", "some processor has no blocks, try a smaller 'nb2d' parameter or reduce the number of mpi processes."); + } + } + local2global_row_.resize(nrow); for (int i = 0; i < nrow; ++i) {