Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 33 additions & 53 deletions source/source_base/parallel_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,85 +233,65 @@ void Parallel_Reduce::gather_int_all(int& v, int* all)
return;
}

void Parallel_Reduce::gather_min_int_all(const int& nproc, int& v)
template <>
void Parallel_Reduce::reduce_min<int>(int& v)
{
#ifdef __MPI
std::vector<int> all(nproc, 0);
MPI_Allgather(&v, 1, MPI_INT, all.data(), 1, MPI_INT, MPI_COMM_WORLD);
for (int i = 0; i < nproc; i++)
{
if (v > all[i])
{
v = all[i];
}
}
MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
#endif
}

void Parallel_Reduce::gather_max_double_all(const int& nproc, double& v)
template <>
void Parallel_Reduce::reduce_min<float>(float& v)
{
#ifdef __MPI
std::vector<double> value(nproc, 0.0);
MPI_Allgather(&v, 1, MPI_DOUBLE, value.data(), 1, MPI_DOUBLE, MPI_COMM_WORLD);
for (int i = 0; i < nproc; i++)
{
if (v < value[i])
{
v = value[i];
}
}
MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);
#endif
}

void Parallel_Reduce::gather_max_double_pool(const int& nproc_in_pool, double& v)
template <>
void Parallel_Reduce::reduce_min<double>(double& v)
{
#ifdef __MPI
if (nproc_in_pool == 1)
{
return;
}
std::vector<double> value(nproc_in_pool, 0.0);
MPI_Allgather(&v, 1, MPI_DOUBLE, value.data(), 1, MPI_DOUBLE, POOL_WORLD);
for (int i = 0; i < nproc_in_pool; i++)
{
if (v < value[i])
{
v = value[i];
}
}
MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
#endif
}

template <>
void Parallel_Reduce::reduce_max<float>(float& v)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
#endif
}

template <>
void Parallel_Reduce::reduce_max<double>(double& v)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
#endif
}

void Parallel_Reduce::gather_min_double_pool(const int& nproc_in_pool, double& v)
template <>
void Parallel_Reduce::reduce_max_pool<double>(const int& nproc_in_pool, double& v)
{
#ifdef __MPI
if (nproc_in_pool == 1)
{
return;
}
std::vector<double> value(nproc_in_pool, 0.0);
MPI_Allgather(&v, 1, MPI_DOUBLE, value.data(), 1, MPI_DOUBLE, POOL_WORLD);
for (int i = 0; i < nproc_in_pool; i++)
{
if (v > value[i])
{
v = value[i];
}
}
MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MAX, POOL_WORLD);
#endif
}

void Parallel_Reduce::gather_min_double_all(const int& nproc, double& v)
template <>
void Parallel_Reduce::reduce_min_pool<double>(const int& nproc_in_pool, double& v)
{
#ifdef __MPI
std::vector<double> value(nproc, 0.0);
MPI_Allgather(&v, 1, MPI_DOUBLE, value.data(), 1, MPI_DOUBLE, MPI_COMM_WORLD);
for (int i = 0; i < nproc; i++)
if (nproc_in_pool == 1)
{
if (v > value[i])
{
v = value[i];
}
return;
}
MPI_Allreduce(MPI_IN_PLACE, &v, 1, MPI_DOUBLE, MPI_MIN, POOL_WORLD);
#endif
}
15 changes: 8 additions & 7 deletions source/source_base/parallel_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ template <typename T>
void reduce_pool(T& object);
template <typename T>
void reduce_pool(T* object, const int n);
template <typename T>
void reduce_min(T& v);
template <typename T>
void reduce_max(T& v);
template <typename T>
void reduce_min_pool(const int& nproc_in_pool, T& v);
template <typename T>
void reduce_max_pool(const int& nproc_in_pool, T& v);

void reduce_int_diag(int& object); // mohan add 2012-01-12

Expand All @@ -34,13 +42,6 @@ void reduce_double_diag(double* object, const int n);
void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double& object);
void reduce_double_allpool(const int& npool, const int& nproc_in_pool, double* object, const int n);

void gather_min_int_all(const int& nproc, int& v);
void gather_max_double_all(const int& nproc, double& v);
void gather_min_double_all(const int& nproc, double& v);
void gather_max_double_pool(const int& nproc_in_pool, double& v);
void gather_min_double_pool(const int& nproc_in_pool, double& v);

// mohan add 2011-04-21
void gather_int_all(int& v, int* all);

bool check_if_equal(double& v); // mohan add 2009-11-11
Expand Down
16 changes: 8 additions & 8 deletions source/source_base/test_parallel/parallel_reduce_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@
* 3. ReduceComplexAll:
* Tests two variations of reduce_complex_all()
* 4. GatherIntAll:
* Tests gather_int_all() and gather_min_int_all()
* Tests gather_int_all() and reduce_min()
* 5. GatherDoubleAll:
* Tests gather_min_double_all() and gather_max_double_all()
* Tests reduce_min_double() and reduce_max_double()
* 6. ReduceIntDiag:
* Tests reduce_int_diag()
* 7. ReduceDoubleDiag:
Expand All @@ -47,7 +47,7 @@
* 11.ReduceComplexPool:
* Tests two variations of reduce_pool()
* 12.GatherDoublePool:
* Tests gather_min_double_pool() and gather_max_double_pool()
* Tests reduce_min_pool() and reduce_max_pool()
*
*
*/
Expand Down Expand Up @@ -233,7 +233,7 @@ TEST_F(ParaReduce, GatherIntAll)
EXPECT_EQ(local_number, array[my_rank]);
// get minimum integer among all processes
int min_number = local_number;
Parallel_Reduce::gather_min_int_all(nproc, min_number);
Parallel_Reduce::reduce_min(min_number);
for (int i = 0; i < nproc; i++)
{
EXPECT_LE(min_number, array[i]);
Expand All @@ -256,10 +256,10 @@ TEST_F(ParaReduce, GatherDoubleAll)
EXPECT_EQ(local_number, array[my_rank]);
// get minimum integer among all processes
double min_number = local_number;
Parallel_Reduce::gather_min_double_all(nproc, min_number);
Parallel_Reduce::reduce_min(min_number);
// get maximum integer among all processes
double max_number = local_number;
Parallel_Reduce::gather_max_double_all(nproc, max_number);
Parallel_Reduce::reduce_max(max_number);
for (int i = 0; i < nproc; i++)
{
EXPECT_LE(min_number, array[i]);
Expand Down Expand Up @@ -587,10 +587,10 @@ TEST_F(ParaReduce, GatherDoublePool)
EXPECT_EQ(local_number, array[mpiContext.rank_in_pool]);
// get minimum integer among all processes
double min_number = local_number;
Parallel_Reduce::gather_min_double_pool(mpiContext.nproc_in_pool, min_number);
Parallel_Reduce::reduce_min_pool(mpiContext.nproc_in_pool, min_number);
// get maximum integer among all processes
double max_number = local_number;
Parallel_Reduce::gather_max_double_pool(mpiContext.nproc_in_pool, max_number);
Parallel_Reduce::reduce_max_pool(mpiContext.nproc_in_pool, max_number);
for (int i = 0; i < mpiContext.nproc_in_pool; i++)
{
EXPECT_LE(min_number, array[i]);
Expand Down
1 change: 1 addition & 0 deletions source/source_basis/module_pw/pw_basis_big.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define PW_BASIS_BIG_H
#include "source_base/constants.h"
#include "source_base/global_function.h"

#ifdef __MPI
#include "mpi.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion source/source_cell/k_vector_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ void kvec_mpi_k(K_Vectors& kv)
ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "Number of k-points in this process", kv.nks);
int nks_minimum = kv.nks;

Parallel_Reduce::gather_min_int_all(GlobalV::NPROC, nks_minimum);
Parallel_Reduce::reduce_min(nks_minimum);

if (nks_minimum == 0)
{
Expand Down
23 changes: 10 additions & 13 deletions source/source_estate/elecstate_energy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,10 @@ void ElecState::cal_bandgap()
{
vbm =this->eferm.ef;
}
#ifdef __MPI
Parallel_Reduce::gather_max_double_all(GlobalV::NPROC, vbm);
Parallel_Reduce::gather_min_double_all(GlobalV::NPROC, cbm);
#endif

#ifdef __MPI
Parallel_Reduce::reduce_max(vbm);
Parallel_Reduce::reduce_min(cbm);
#endif
this->bandgap = cbm - vbm;
}

Expand Down Expand Up @@ -119,14 +118,12 @@ void ElecState::cal_bandgap_updw()
{
vbm_dw =this->eferm.ef_dw;
}

#ifdef __MPI
Parallel_Reduce::gather_max_double_all(GlobalV::NPROC, vbm_up);
Parallel_Reduce::gather_min_double_all(GlobalV::NPROC, cbm_up);
Parallel_Reduce::gather_max_double_all(GlobalV::NPROC, vbm_dw);
Parallel_Reduce::gather_min_double_all(GlobalV::NPROC, cbm_dw);
#endif

#ifdef __MPI
Parallel_Reduce::reduce_max(vbm_up);
Parallel_Reduce::reduce_min(cbm_up);
Parallel_Reduce::reduce_max(vbm_dw);
Parallel_Reduce::reduce_min(cbm_dw);
#endif
this->bandgap_up = cbm_up - vbm_up;
this->bandgap_dw = cbm_dw - vbm_dw;
}
Expand Down
17 changes: 7 additions & 10 deletions source/source_estate/occupy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,9 @@ void Occupy::iweights(
}
}
}
#ifdef __MPI
Parallel_Reduce::gather_max_double_all(GlobalV::NPROC, ef);
#endif

#ifdef __MPI
Parallel_Reduce::reduce_max(ef);
#endif
return;
}

Expand Down Expand Up @@ -306,13 +305,11 @@ void Occupy::efermig(const ModuleBase::matrix& ekb,

eup += 2 * smearing_sigma;
elw -= 2 * smearing_sigma;

#ifdef __MPI
// find min and max across pools
Parallel_Reduce::gather_max_double_all(GlobalV::NPROC, eup);
Parallel_Reduce::gather_min_double_all(GlobalV::NPROC, elw);

#endif
#ifdef __MPI
Parallel_Reduce::reduce_max(eup);
Parallel_Reduce::reduce_min(elw);
#endif
//=================
// Bisection method
//=================
Expand Down
4 changes: 2 additions & 2 deletions source/source_io/cal_dos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ void ModuleIO::prepare_dos(std::ofstream& ofs_running,
}

#ifdef __MPI
Parallel_Reduce::gather_max_double_all(GlobalV::NPROC, emax);
Parallel_Reduce::gather_min_double_all(GlobalV::NPROC, emin);
Parallel_Reduce::reduce_max(emax);
Parallel_Reduce::reduce_min(emin);
#endif

emax *= ModuleBase::Ry_to_eV;
Expand Down
14 changes: 7 additions & 7 deletions source/source_lcao/module_operator_lcao/op_exx_lcao.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#ifdef __EXX

#include "op_exx_lcao.h"
#include "source_base/parallel_reduce.h"
#include "source_io/module_parameter/parameter.h"
#include "source_lcao/module_ri/RI_2D_Comm.h"
#include "source_hamilt/module_xc/xc_functional.h"
Expand Down Expand Up @@ -244,10 +245,9 @@ OperatorEXX<OperatorLCAO<TK, TR>>::OperatorEXX(HS_Matrix_K<TK>* hsk_in,
if (!ifs) { all_exist = 0; break; }
}
// Add MPI communication to synchronize all_exist across processes
#ifdef __MPI
// don't read in any files if one of the processes doesn't have it
MPI_Allreduce(MPI_IN_PLACE, &all_exist, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
#endif
#ifdef __MPI
Parallel_Reduce::reduce_min(all_exist);
#endif
if (all_exist)
{
// Read HexxR in CSR format
Expand All @@ -264,9 +264,9 @@ OperatorEXX<OperatorLCAO<TK, TR>>::OperatorEXX(HS_Matrix_K<TK>* hsk_in,
const std::string restart_HR_path_cereal = GlobalC::restart.folder + "HexxR_" + std::to_string(PARAM.globalv.myrank);
std::ifstream ifs(restart_HR_path_cereal, std::ios::binary);
int all_exist_cereal = ifs ? 1 : 0;
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, &all_exist_cereal, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
#endif
#ifdef __MPI
Parallel_Reduce::reduce_min(all_exist_cereal);
#endif
if (!all_exist_cereal)
{
//no HexxR file in CSR or binary format
Expand Down
7 changes: 3 additions & 4 deletions source/source_pw/module_pwdft/setup_pwwfc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "source_pw/module_pwdft/setup_pwwfc.h" // pw_wfc
#include "source_base/parallel_comm.h" // POOL_WORLD
#include "source_base/parallel_reduce.h" // Parallel_Reduce
#include "source_io/print_info.h" // print information

void pw::teardown_pwwfc(ModulePW::PW_Basis_K* &pw_wfc)
Expand Down Expand Up @@ -52,14 +53,12 @@ void pw::setup_pwwfc(const Input_para& inp,
pw_rho.nz);

pw_wfc->initparameters(false, inp.ecutwfc, kv.get_nks(), kv.kvec_d.data());

#ifdef __MPI
if (inp.pw_seed > 0)
{
MPI_Allreduce(MPI_IN_PLACE, &pw_wfc->ggecut, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
Parallel_Reduce::reduce_max( pw_wfc->ggecut);
}
// qianrui add 2021-8-13 to make different kpar parameters can get the same
// results
// qianrui add 2021-8-13 to make different kpar parameters can get the same result
#endif

pw_wfc->fft_bundle.initfftmode(inp.fft_mode);
Expand Down
4 changes: 2 additions & 2 deletions source/source_pw/module_stodft/sto_iter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ void Stochastic_Iter<T, Device>::checkemm(const int& ik,
if (ik == nks - 1)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, p_hamilt_sto->emax, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, p_hamilt_sto->emin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
Parallel_Reduce::reduce_max(*p_hamilt_sto->emax);
Parallel_Reduce::reduce_min(*p_hamilt_sto->emin);
MPI_Allreduce(MPI_IN_PLACE, &change, 1, MPI_CHAR, MPI_LOR, MPI_COMM_WORLD);
#endif
if (change)
Expand Down
5 changes: 3 additions & 2 deletions source/source_pw/module_stodft/sto_tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "source_base/math_chebyshev.h"
#include "source_base/parallel_device.h"
#include "source_base/parallel_reduce.h"
#include "source_base/timer.h"
#include "source_io/module_parameter/parameter.h"
#ifdef __MPI
Expand Down Expand Up @@ -103,8 +104,8 @@ void check_che_op<FPTYPE, Device>::operator()(const int& nche_in,
if (ik == nk - 1)
{
#ifdef __MPI
MPI_Allreduce(MPI_IN_PLACE, p_hamilt_sto->emax, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(MPI_IN_PLACE, p_hamilt_sto->emin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
Parallel_Reduce::reduce_max(*p_hamilt_sto->emax);
Parallel_Reduce::reduce_min(*p_hamilt_sto->emin);
#endif
GlobalV::ofs_running << "New Emax " << *p_hamilt_sto->emax << " Ry; new Emin " << *p_hamilt_sto->emin
<< " Ry" << std::endl;
Expand Down
Loading