Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions source/module_base/blacs_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,11 @@ extern "C"
void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol);
void Cblacs_exit(int icontxt);
}

#ifdef __MPI
#include <mpi.h>
extern "C"
{
int Csys2blacs_handle(MPI_Comm SysCtxt);
}
#endif
201 changes: 37 additions & 164 deletions source/module_basis/module_ao/parallel_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,23 @@ Parallel_2D::~Parallel_2D()

void Parallel_2D::set_proc_dim(const int& dsize, bool mode /*= 0*/)
{
if (mode) //dim0 >= dim1
// default mode = 0: dim0 <= dim1
this->dim0 = (int)sqrt((double)dsize);
while (dsize % this->dim0 != 0)
{
this->dim1 = (int)sqrt((double)dsize);
while (dsize % this->dim1 != 0)
{
this->dim1 = this->dim1 - 1;
}
assert(this->dim1 > 0);
this->dim0 = dsize / this->dim1;
this->dim0 = this->dim0 - 1;
}
else //dim0 <= dim1
{
this->dim0 = (int)sqrt((double)dsize);
while (dsize % this->dim0 != 0)
{
this->dim0 = this->dim0 - 1;
}
assert(this->dim0 > 0);
this->dim1 = dsize / this->dim0;
assert(this->dim0 > 0);
this->dim1 = dsize / this->dim0;

if (mode) { // mode = 1: dim0 >= dim1
std::swap(this->dim0, this->dim1);
}
}

bool Parallel_2D::in_this_processor(const int& iw1_all, const int& iw2_all) const
{
if (global2local_row(iw1_all) == -1)
return false;
else if (global2local_col(iw2_all) == -1)
return false;
return true;
return global2local_row(iw1_all) != -1 && global2local_col(iw2_all) != -1;
}

void Parallel_2D::set_global2local(const int& M_A, const int& N_A,
Expand Down Expand Up @@ -106,6 +94,11 @@ extern "C"
#include "module_base/scalapack_connector.h"
}

// FIXME In theory BLACS would split the given communicator to get some new
// ones for its own purpose when initializing the process grid, so there might
// be unnecessary to create a Cartesian MPI communicator in advance.
// ***This needs to be verified***

void Parallel_2D::mpi_create_cart(const MPI_Comm& diag_world)
{
ModuleBase::TITLE("Parallel_2D", "mpi_create_cart");
Expand All @@ -132,20 +125,11 @@ void Parallel_2D::set_desc(const int& gr, const int& gc, const int& lld, bool fi
if (first_time)
{
int myprow=0, mypcol=0;
int* usermap = new int[this->dim0 * this->dim1];
for (int i = 0; i < this->dim0; ++i)
{
for (int j = 0; j < this->dim1; ++j)
{
int pcoord[2] = { i, j };
MPI_Cart_rank(comm_2D, pcoord, &usermap[i + j * this->dim0]);
}
}
MPI_Fint comm_2D_f = MPI_Comm_c2f(comm_2D);
Cblacs_get(comm_2D_f, 0, &this->blacs_ctxt);
Cblacs_gridmap(&this->blacs_ctxt, usermap, this->dim0, this->dim0, this->dim1);
Cblacs_gridinfo(this->blacs_ctxt, &this->dim0, &this->dim1, &myprow, &mypcol);
delete[] usermap;
char order = 'R'; // row major process grid

blacs_ctxt = Csys2blacs_handle(comm_2D);
Cblacs_gridinit(&blacs_ctxt, &order, dim0, dim1);
Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &myprow, &mypcol);
}
int ISRC = 0;
int info = 0;
Expand All @@ -166,159 +150,48 @@ int Parallel_2D::set_local2global(

int dim[2];
int period[2];
int j, end_id, block;
int row_b, col_b;

// (0) every processor get it's id on the 2D comm
// : ( coord[0], coord[1] )
MPI_Cart_get(this->comm_2D, 2, dim, period, coord);
assert(dim[0] == this->dim0);
assert(dim[1] == this->dim1);

// (1.1) how many blocks at least
// eg. M_A = 6400, nb = 64;
// so block = 10;
block = M_A / nb;

// (1.2) If data remain, add 1.
if (block * nb < M_A)
{
block++;
}

if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Total Row Blocks Number", block);
// local number of row and columns
const int zero = 0;
nrow = numroc_(&M_A, &nb, &coord[0], &zero, &dim0);
ncol = numroc_(&N_A, &nb, &coord[1], &zero, &dim1);
nloc = nrow * ncol;

// mohan add 2010-09-12
if (dim[0] > block)
{
ofs_warning << " cpu 2D distribution : " << dim[0] << "*" << dim[1] << std::endl;
ofs_warning << " but, the number of row blocks is " << block << std::endl;
if (nb > 1)
{
return 1;
}
else
{
ModuleBase::WARNING_QUIT("Parallel_2D::set_local2global", "some processor has no row blocks, try a smaller 'nb2d' parameter.");
}
}

// (2.1) row_b : how many blocks for this processor. (at least)
row_b = block / dim[0];

// (2.2) row_b : how many blocks in this processor.
// if there are blocks remain, some processors add 1.
if (coord[0] < block % dim[0])
{
row_b++;
}

if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local Row Block Number", row_b);

// (3) end_id indicates the last block belong to
// which processor.
if (block % dim[0] == 0)
{
end_id = dim[0] - 1;
}
else
{
end_id = block % dim[0] - 1;
}

if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Ending Row Block in processor", end_id);

// (4) this->nrow : how many rows in this processors :
// the one owns the last block is different.
if (coord[0] == end_id)
{
this->nrow = (row_b - 1) * nb + (M_A - (block - 1) * nb);
}
else
{
this->nrow = row_b * nb;
}

if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local rows (including nb)", this->nrow);

// (5) local2global_row, it's a global index :
// save explicitly : every row in this processor
// belongs to which row in the global matrix.
this->local2global_row_.resize(this->nrow);
j = 0;
for (int i = 0; i < row_b; i++)
{
for (int k = 0; k < nb && (coord[0] * nb + i * nb * dim[0] + k < M_A); k++, j++)
{
this->local2global_row_[j] = coord[0] * nb + i * nb * dim[0] + k;
}
}

// the same procedures for columns.
block = N_A / nb;
if (block * nb < N_A)
{
block++;
}
if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Total Col Blocks Number", block);

if (dim[1] > block)
if (nrow == 0 || ncol == 0)
{
ofs_warning << " cpu 2D distribution : " << dim[0] << "*" << dim[1] << std::endl;
ofs_warning << " but, the number of column blocks is " << block << std::endl;
ofs_warning << " but, the number of row and column blocks are "
<< M_A / nb + (M_A % nb != 0) << "*" << N_A / nb + (N_A % nb != 0)
<< std::endl;
if (nb > 1)
{
return 1;
}
else
{
ModuleBase::WARNING_QUIT("Parallel_2D::set_local2global", "some processor has no column blocks.");
ModuleBase::WARNING_QUIT("Parallel_2D::set_local2global", "some processor has no blocks, try a smaller 'nb2d' parameter or reduce the number of mpi processes.");
}
}

col_b = block / dim[1];
if (coord[1] < block % dim[1])
local2global_row_.resize(nrow);
for (int i = 0; i < nrow; ++i)
{
col_b++;
local2global_row_[i] = (i / nb) * dim0 * nb + coord[0] * nb + i % nb;
}

if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local Col Block Number", col_b);

if (block % dim[1] == 0)
local2global_col_.resize(ncol);
for (int j = 0; j < ncol; ++j)
{
end_id = dim[1] - 1;
local2global_col_[j] = (j / nb) * dim1 * nb + coord[1] * nb + j % nb;
}
else
{
end_id = block % dim[1] - 1;
}

if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Ending Col Block in processor", end_id);

if (coord[1] == end_id)
{
this->ncol = (col_b - 1) * nb + (N_A - (block - 1) * nb);
}
else
{
this->ncol = col_b * nb;
}

if (this->testpb)ModuleBase::GlobalFunc::OUT(ofs_running, "Local columns (including nb)", this->ncol);

//set nloc
this->nloc = this->nrow * this->ncol;

this->local2global_col_.resize(this->ncol);

j = 0;
for (int i = 0; i < col_b; i++)
{
for (int k = 0; k < nb && (coord[1] * nb + i * nb * dim[1] + k < N_A); k++, j++)
{
this->local2global_col_[j] = coord[1] * nb + i * nb * dim[1] + k;
}
}
return 0;
}
#else
Expand All @@ -333,4 +206,4 @@ void Parallel_2D::set_serial(const int& M_A, const int& N_A)
for (int i = 0; i < this->nrow; i++) this->local2global_row_[i] = i;
for (int i = 0; i < this->ncol; i++) this->local2global_col_[i] = i;
}
#endif
#endif