diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ce46a2e5a..d049887b19 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON) option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF) option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF) option(ENABLE_CNPY "Enable cnpy usage." OFF) +option(ENABLE_PEXSI "Enable support for PEXSI." OFF) # enable json support if(ENABLE_RAPIDJSON) @@ -211,6 +212,14 @@ if(ENABLE_LCAO) if(ENABLE_FFT_TWO_CENTER) add_compile_definitions(USE_NEW_TWO_CENTER) endif() + + if(ENABLE_PEXSI) + find_package(PEXSI REQUIRED) + target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) + include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) + add_compile_definitions(__PEXSI) + set(CMAKE_CXX_STANDARD 14) + endif() else() set(ENABLE_DEEPKS OFF) set(ENABLE_LIBRI OFF) diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake new file mode 100644 index 0000000000..5adc4c8a6d --- /dev/null +++ b/cmake/FindPEXSI.cmake @@ -0,0 +1,57 @@ +############################################################################### +# - Find PEXSI +# Find PEXSI and its dependencies. +# +# PEXSI_FOUND - True if pexsi is found. +# PEXSI_INCLUDE_DIR - Where to find pexsi headers. +# PEXSI_LIBRARY - pexsi library. +# ParMETIS_INCLUDE_DIR - Where to find pexsi headers. +# ParMETIS_LIBRARY - parmetis library. +# METIS_LIBRARY - metis library. +# SuperLU_DIST_LIBRARY - superlu_dist library. + +find_path(PEXSI_INCLUDE_DIR + NAMES c_pexsi_interface.h + HINTS ${PEXSI_DIR} + PATH_SUFFIXES "include" +) + +find_library(PEXSI_LIBRARY + NAMES pexsi + HINTS ${PEXSI_DIR} + PATH_SUFFIXES "lib" +) + +find_path(ParMETIS_INCLUDE_DIR + NAMES metis.h parmetis.h + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "include" +) + +find_library(METIS_LIBRARY + NAMES metis + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "lib" +) + +find_library(ParMETIS_LIBRARY + NAMES parmetis + HINTS ${ParMETIS_DIR} + PATH_SUFFIXES "lib" +) + +find_library(SuperLU_DIST_LIBRARY + NAMES superlu_dist + HINTS ${SuperLU_DIST_DIR} + PATH_SUFFIXES "lib" +) + +# Handle the QUIET and REQUIRED arguments and +# set PEXSI_FOUND to TRUE if all variables are non-zero. +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY) + + +# Copy the results to the output variables and target. +mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY) + diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index a3055c70ec..d1c996127d 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -377,6 +377,30 @@ - [qo\_strategy](#qo_strategy) - [qo\_screening\_coeff](#qo_screening_coeff) - [qo\_thr](#qo_thr) + - [PEXSI](#pexsi) + - [pexsi\_npole](#pexsi_npole) + - [pexsi\_inertia](#pexsi_inertia) + - [pexsi\_nmax](#pexsi_nmax) + - [pexsi\_comm](#pexsi_comm) + - [pexsi\_storage](#pexsi_storage) + - [pexsi\_ordering](#pexsi_ordering) + - [pexsi\_row\_ordering](#pexsi_row_ordering) + - [pexsi\_nproc](#pexsi_nproc) + - [pexsi\_symm](#pexsi_symm) + - [pexsi\_trans](#pexsi_trans) + - [pexsi\_method](#pexsi_method) + - [pexsi\_nproc\_pole](#pexsi_nproc_pole) + - [pexsi\_temp](#pexsi_temp) + - [pexsi\_gap](#pexsi_gap) + - [pexsi\_delta\_e](#pexsi_delta_e) + - [pexsi\_mu\_lower](#pexsi_mu_lower) + - [pexsi\_mu\_upper](#pexsi_mu_upper) + - [pexsi\_mu](#pexsi_mu) + - [pexsi\_mu\_thr](#pexsi_mu_thr) + - [pexsi\_mu\_expand](#pexsi_mu_expand) + - [pexsi\_mu\_guard](#pexsi_mu_guard) + - [pexsi\_elec\_thr](#pexsi_elec_thr) + - [pexsi\_zero\_thr](#pexsi_zero_thr) [back to top](#full-list-of-input-keywords) @@ -3530,4 +3554,146 @@ These variables are used to control the usage of QO analysis. QO further compres - **Description**: the convergence threshold determining the cutoff of generated orbital. Lower threshold will yield orbital with larger cutoff radius. - **Default**: 1.0e-6 +## PEXSI + +These variables are used to control the usage of PEXSI (Pole Expansion and Selected Inversion) method in calculations. + +### pexsi_npole + +- **Type**: Integer +- **Description**: the number of poles used in the pole expansion method, should be a even number. +- **Default**: 40 + +### pexsi_inertia + +- **Type**: Boolean +- **Description**: whether inertia counting is used at the very beginning. +- **Default**: True + +### pexsi_nmax + +- **Type**: Integer +- **Description**: maximum number of PEXSI iterations after each inertia counting procedure. +- **Default**: 80 + +### pexsi_comm + +- **Type**: Boolean +- **Description**: whether to construct PSelInv communication pattern. +- **Default**: True + +### pexsi_storage + +- **Type**: Boolean +- **Description**: whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices. +- **Default**: True + +### pexsi_ordering + +- **Type**: Integer +- **Description**: ordering strategy for factorization and selected inversion. 0: Parallel ordering using ParMETIS, 1: Sequential ordering using METIS, 2: Multiple minimum degree ordering +- **Default**: 0 + +### pexsi_row_ordering + +- **Type**: Integer +- **Description**: row permutation strategy for factorization and selected inversion, 0: No row permutation, 1: Make the diagonal entry of the matrix larger than the off-diagonal entries. +- **Default**: 1 + +### pexsi_nproc + +- **Type**: Integer +- **Description**: number of processors for PARMETIS. Only used if pexsi_ordering == 0. +- **Default**: 1 + +### pexsi_symm + +- **Type**: Boolean +- **Description**: whether the matrix is symmetric. +- **Default**: True + +### pexsi_trans + +- **Type**: Boolean +- **Description**: whether to factorize the transpose of the matrix. +- **Default**: False + +### pexsi_method + +- **Type**: Integer +- **Description**: the pole expansion method to be used. 1 for Cauchy Contour Integral method, 2 for Moussa optimized method. +- **Default**: 1 + +### pexsi_nproc_pole + +- **Type**: Integer +- **Description**: the point parallelizaion of PEXSI. Recommend two points parallelization. +- **Default**: 1 + +### pexsi_temp + +- **Type**: Real +- **Description**: temperature in Fermi-Dirac distribution, in Ry, should have the same effect as the smearing sigma when smearing method is set to Fermi-Dirac. +- **Default**: 0.015 + +### pexsi_gap + +- **Type**: Real +- **Description**: spectral gap, this can be set to be 0 in most cases. +- **Default**: 0 + +### pexsi_delta_e + +- **Type**: Real +- **Description**: an upper bound for the spectral radius of $S^{-1} H$. +- **Default**: 20 + +### pexsi_mu_lower + +- **Type**: Real +- **Description**: initial guess of lower bound for mu. +- **Default**: -10 + +### pexsi_mu_upper + +- **Type**: Real +- **Description**: initial guess of upper bound for mu. +- **Default**: 10 + +### pexsi_mu + +- **Type**: Real +- **Description**: initial guess for mu (for the solver). +- **Default**: 0 + +### pexsi_mu_thr + +- **Type**: Real +- **Description**: stopping criterion in terms of the chemical potential for the inertia counting procedure. +- **Default**: 0.05 + +### pexsi_mu_expand + +- **Type**: Real +- **Description**: if the chemical potential is not in the initial interval, the interval is expanded by this value. +- **Default**: 0.3 + +### pexsi_mu_guard + +- **Type**: Real +- **Description**: safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure. +- **Default**: 0.2 + +### pexsi_elec_thr + +- **Type**: Real +- **Description**: stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact. +- **Default**: 0.001 + +### pexsi_zero_thr + +- **Type**: Real +- **Description**: if the absolute value of CCS matrix element is less than this value, it will be considered as zero. +- **Default**: 1e-10 + [back to top](#full-list-of-input-keywords) diff --git a/docs/advanced/install.md b/docs/advanced/install.md index d6201a060f..a5deeb5888 100644 --- a/docs/advanced/install.md +++ b/docs/advanced/install.md @@ -104,6 +104,18 @@ Currently supported math functions: cmake -B build -DUSE_ABACUS_LIBM=1 ``` +## Build with PEXSI support + +ABACUS supports the PEXSI library for gamma only LCAO calculations. PEXSI version 2.0.0 is tested to work with ABACUS, please always use the latest version of PEXSI. + +To build ABACUS with PEXSI support, you need to compile PEXSI (and its dependencies) first. Please refer to the [PEXSI Installation Guide](https://pexsi.readthedocs.io/en/latest/install.html) for more details. Note that PEXSI requires ParMETIS and SuperLU_DIST. + +After compiling PEXSI, you can set `ENABLE_PEXSI` to `ON`. If the libraries are not installed in standard paths, you can set `PEXSI_DIR`, `ParMETIS_DIR` and `SuperLU_DIST_DIR` to the corresponding directories. + +```bash +cmake -B build -DENABLE_PEXSI=ON -DPEXSI_DIR=${path to PEXSI installation directory} -DParMETIS_DIR=${path to ParMETIS installation directory} -DSuperLU_DIST_DIR=${path to SuperLU_DIST installation directory} +``` + ## Build ABACUS with make > Note: We suggest using CMake to configure and compile. diff --git a/examples/pexsi/md_Si8/INPUT b/examples/pexsi/md_Si8/INPUT new file mode 100644 index 0000000000..13231579bc --- /dev/null +++ b/examples/pexsi/md_Si8/INPUT @@ -0,0 +1,32 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix Si_rescaling +calculation md +nbands 20 +symmetry 0 +pseudo_dir ../../../tests/PP_ORB +orbital_dir ../../../tests/PP_ORB + +#Parameters (2.Iteration) +ecutwfc 30 +scf_thr 1e-5 +scf_nmax 100 + +#Parameters (3.Basis) +basis_type lcao +ks_solver pexsi +gamma_only 1 + +#Parameters (5.Mixing) +mixing_type broyden +mixing_beta 0.3 +chg_extrap second-order + +#Parameters (6.MD) +md_type nvt +md_thermostat rescaling +md_tolerance 10 +md_nstep 10 +md_dt 1 +md_tfirst 300 +md_tfreq 0.025 diff --git a/examples/pexsi/md_Si8/KPT b/examples/pexsi/md_Si8/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/examples/pexsi/md_Si8/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/examples/pexsi/md_Si8/STRU b/examples/pexsi/md_Si8/STRU new file mode 100644 index 0000000000..795530e8cf --- /dev/null +++ b/examples/pexsi/md_Si8/STRU @@ -0,0 +1,28 @@ +ATOMIC_SPECIES +Si 28.085 Si_ONCV_PBE-1.0.upf + +NUMERICAL_ORBITAL +Si_gga_8au_60Ry_2s2p1d.orb + +LATTICE_CONSTANT +1.8897270 # 1 Angstrom = 1.8897270 bohr + +LATTICE_VECTORS +5.43090 0.00000 0.00000 +0.00000 5.43090 0.00000 +0.00000 0.00000 5.43090 + +ATOMIC_POSITIONS +Direct + +Si +0.0 +8 +0.000 0.000 0.000 1 1 1 +0.000 0.500 0.500 1 1 1 +0.500 0.000 0.500 1 1 1 +0.500 0.500 0.000 1 1 1 +0.250 0.250 0.250 1 1 1 +0.250 0.750 0.750 1 1 1 +0.750 0.250 0.750 1 1 1 +0.750 0.750 0.250 1 1 1 \ No newline at end of file diff --git a/examples/pexsi/scf_Si64/INPUT b/examples/pexsi/scf_Si64/INPUT new file mode 100755 index 0000000000..ddb9813701 --- /dev/null +++ b/examples/pexsi/scf_Si64/INPUT @@ -0,0 +1,20 @@ +INPUT_PARAMETERS +suffix test +ntype 1 +nbands 200 +pseudo_dir ../../../tests/PP_ORB +orbital_dir ../../../tests/PP_ORB + +calculation scf +mixing_beta 0.4 +basis_type lcao +gamma_only 1 +symmetry 0 + +ecutwfc 60 +lcao_dr 1e-3 +scf_nmax 20 + +ks_solver pexsi + +pexsi_npole 40 \ No newline at end of file diff --git a/examples/pexsi/scf_Si64/KPT b/examples/pexsi/scf_Si64/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/examples/pexsi/scf_Si64/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/examples/pexsi/scf_Si64/STRU b/examples/pexsi/scf_Si64/STRU new file mode 100755 index 0000000000..70e722c0ec --- /dev/null +++ b/examples/pexsi/scf_Si64/STRU @@ -0,0 +1,85 @@ +ATOMIC_SPECIES +Si 28 Si_ONCV_PBE-1.0.upf + +NUMERICAL_ORBITAL +Si_gga_8au_60Ry_2s2p1d.orb + +LATTICE_CONSTANT +10.2 // add lattice constant + +LATTICE_VECTORS +2 0 0 +0 2 0 +0 0 2 + +ATOMIC_POSITIONS +Cartesian //Cartesian or Direct coordinate. + +Si // Element type +0 // magnetism +64 // number of atoms +0.00 0.00 0.00 0 0 0 +0.25 0.25 0.25 0 0 0 +0.00 0.50 0.50 0 0 0 +0.25 0.75 0.75 0 0 0 +0.50 0.00 0.50 0 0 0 +0.75 0.25 0.75 0 0 0 +0.50 0.50 0.00 0 0 0 +0.75 0.75 0.25 0 0 0 +1.00 0.00 0.00 0 0 0 +1.25 0.25 0.25 0 0 0 +1.00 0.50 0.50 0 0 0 +1.25 0.75 0.75 0 0 0 +1.50 0.00 0.50 0 0 0 +1.75 0.25 0.75 0 0 0 +1.50 0.50 0.00 0 0 0 +1.75 0.75 0.25 0 0 0 +0.00 1.00 0.00 0 0 0 +0.25 1.25 0.25 0 0 0 +0.00 1.50 0.50 0 0 0 +0.25 1.75 0.75 0 0 0 +0.50 1.00 0.50 0 0 0 +0.75 1.25 0.75 0 0 0 +0.50 1.50 0.00 0 0 0 +0.75 1.75 0.25 0 0 0 +1.00 1.00 0.00 0 0 0 +1.25 1.25 0.25 0 0 0 +1.00 1.50 0.50 0 0 0 +1.25 1.75 0.75 0 0 0 +1.50 1.00 0.50 0 0 0 +1.75 1.25 0.75 0 0 0 +1.50 1.50 0.00 0 0 0 +1.75 1.75 0.25 0 0 0 +0.00 0.00 1.00 0 0 0 +0.25 0.25 1.25 0 0 0 +0.00 0.50 1.50 0 0 0 +0.25 0.75 1.75 0 0 0 +0.50 0.00 1.50 0 0 0 +0.75 0.25 1.75 0 0 0 +0.50 0.50 1.00 0 0 0 +0.75 0.75 1.25 0 0 0 +1.00 0.00 1.00 0 0 0 +1.25 0.25 1.25 0 0 0 +1.00 0.50 1.50 0 0 0 +1.25 0.75 1.75 0 0 0 +1.50 0.00 1.50 0 0 0 +1.75 0.25 1.75 0 0 0 +1.50 0.50 1.00 0 0 0 +1.75 0.75 1.25 0 0 0 +0.00 1.00 1.00 0 0 0 +0.25 1.25 1.25 0 0 0 +0.00 1.50 1.50 0 0 0 +0.25 1.75 1.75 0 0 0 +0.50 1.00 1.50 0 0 0 +0.75 1.25 1.75 0 0 0 +0.50 1.50 1.00 0 0 0 +0.75 1.75 1.25 0 0 0 +1.00 1.00 1.00 0 0 0 +1.25 1.25 1.25 0 0 0 +1.00 1.50 1.50 0 0 0 +1.25 1.75 1.75 0 0 0 +1.50 1.00 1.50 0 0 0 +1.75 1.25 1.75 0 0 0 +1.50 1.50 1.00 0 0 0 +1.75 1.75 1.25 0 0 0 + diff --git a/examples/pexsi/scf_spin_Fe2/INPUT b/examples/pexsi/scf_spin_Fe2/INPUT new file mode 100644 index 0000000000..a6a5bcc971 --- /dev/null +++ b/examples/pexsi/scf_spin_Fe2/INPUT @@ -0,0 +1,22 @@ +INPUT_PARAMETERS +suffix autotest +#nbands 40 + +calculation scf +ecutwfc 20 +scf_thr 1.0e-8 +scf_nmax 50 +out_chg 0 + +mixing_type broyden + + +ks_solver pexsi +pexsi_temp 0.1 +pexsi_npole 80 +basis_type lcao +gamma_only 1 +symmetry 0 +nspin 2 +pseudo_dir ../../../tests/PP_ORB +orbital_dir ../../../tests/PP_ORB diff --git a/examples/pexsi/scf_spin_Fe2/KPT b/examples/pexsi/scf_spin_Fe2/KPT new file mode 100644 index 0000000000..c289c0158a --- /dev/null +++ b/examples/pexsi/scf_spin_Fe2/KPT @@ -0,0 +1,4 @@ +K_POINTS +0 +Gamma +1 1 1 0 0 0 diff --git a/examples/pexsi/scf_spin_Fe2/STRU b/examples/pexsi/scf_spin_Fe2/STRU new file mode 100644 index 0000000000..b7a2039467 --- /dev/null +++ b/examples/pexsi/scf_spin_Fe2/STRU @@ -0,0 +1,29 @@ +ATOMIC_SPECIES +Fe1 1.000 Fe_ONCV_PBE-1.0.upf +Fe2 1.000 Fe_ONCV_PBE-1.0.upf + +NUMERICAL_ORBITAL +Fe_gga_9au_100Ry_4s2p2d1f.orb +Fe_gga_9au_100Ry_4s2p2d1f.orb + +LATTICE_CONSTANT +15 + +LATTICE_VECTORS + 1.00 0.50 0.50 + 0.50 1.00 0.50 + 0.50 0.50 1.00 +ATOMIC_POSITIONS +Direct + +Fe1 +5.0 +1 +0.00 0.00 0.00 1 1 1 + +Fe2 +-5.0 +1 +0.50 0.50 0.50 1 1 1 + + diff --git a/source/Makefile b/source/Makefile index c9a6962bb8..e18e54ae9a 100644 --- a/source/Makefile +++ b/source/Makefile @@ -176,6 +176,13 @@ ifdef DeePMD_DIR INCLUDES += -I${TensorFlow_INCLUDE_DIR} endif +ifdef PEXSI_DIR + OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI} + INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include + LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis + HONG += -D__PEXSI +endif + include Makefile.Objects #========================== diff --git a/source/Makefile.Objects b/source/Makefile.Objects index f61b7cced1..d56bae5d3c 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -30,6 +30,7 @@ VPATH=./src_global:\ ./module_hsolver:\ ./module_hsolver/kernels:\ ./module_hsolver/genelpa:\ +./module_hsolver/module_pexsi:\ ./module_elecstate:\ ./module_elecstate/kernels:\ ./module_elecstate/potentials:\ @@ -102,6 +103,7 @@ ${OBJS_VDW}\ ${OBJS_DFTU}\ ${OBJS_DELTASPIN}\ ${OBJS_TENSOR}\ +${OBJS_HSOLVER_PEXSI}\ OBJS_MAIN=main.o\ driver.o\ @@ -295,7 +297,7 @@ OBJS_HSOLVER=diago_cg.o\ diago_iter_assist.o\ math_kernel_op.o\ dngvd_op.o\ - + OBJS_HSOLVER_LCAO=hsolver_lcao.o\ diago_blas.o\ diago_elpa.o\ @@ -304,6 +306,13 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\ elpa_new_complex.o\ utils.o\ +OBJS_HSOLVER_PEXSI=diago_pexsi.o\ + pexsi_solver.o\ + simple_pexsi.o\ + dist_bcd_matrix.o\ + dist_ccs_matrix.o\ + dist_matrix_transformer.o\ + OBJS_MD=fire.o\ langevin.o\ md_base.o\ diff --git a/source/Makefile.vars b/source/Makefile.vars index 99092a10ac..f00eca8ea3 100644 --- a/source/Makefile.vars +++ b/source/Makefile.vars @@ -59,6 +59,7 @@ CEREAL_DIR = /usr/local/include/cereal ## To use LIBXC: set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7) ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR ## To use LibRI: set LIBRI_DIR and LIBCOMM_DIR +## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR ##--------------------------------------------------------------------- # LIBTORCH_DIR = /usr/local @@ -72,6 +73,10 @@ CEREAL_DIR = /usr/local/include/cereal # LIBRI_DIR = /public/software/LibRI # LIBCOMM_DIR = /public/software/LibComm +# PEXSI_DIR = /public/software/pexsi +# DSUPERLU_DIR = /public/software/superlu_dist +# PARMETIS_DIR = /public/software/parmetis + ##--------------------------------------------------------------------- # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile # DEBUG = OFF diff --git a/source/module_base/global_function.h b/source/module_base/global_function.h index 143ca35057..89e68c9d74 100644 --- a/source/module_base/global_function.h +++ b/source/module_base/global_function.h @@ -350,7 +350,7 @@ T ddot_real( //========================================================== static inline bool IS_COLUMN_MAJOR_KS_SOLVER() { - return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver" || GlobalV::KS_SOLVER=="cg_in_lcao"; + return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver" || GlobalV::KS_SOLVER=="cg_in_lcao" || GlobalV::KS_SOLVER=="pexsi"; } }//namespace GlobalFunc diff --git a/source/module_basis/module_ao/ORB_control.cpp b/source/module_basis/module_ao/ORB_control.cpp index 7e1f3636c4..c3a0bb41d2 100644 --- a/source/module_basis/module_ao/ORB_control.cpp +++ b/source/module_basis/module_ao/ORB_control.cpp @@ -184,7 +184,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running, ofs_running << "\n SETUP THE DIVISION OF H/S MATRIX" << std::endl; // (1) calculate nrow, ncol, nloc. - if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao") + if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao" || ks_solver == "pexsi") { ofs_running << " divide the H&S matrix using 2D block algorithms." << std::endl; #ifdef __MPI @@ -205,7 +205,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running, bool div_2d; if (ks_solver == "lapack" || ks_solver == "cg" || ks_solver == "dav" || ks_solver == "dav_subspace") div_2d = false; #ifdef __MPI - else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao") div_2d = true; + else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao" || ks_solver == "pexsi") div_2d = true; #endif else { @@ -375,7 +375,7 @@ assert(nb2d > 0); } // init blacs context for genelpa - if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao") + if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao" || ks_solver == "pexsi") { pv->set_desc(nlocal, nlocal, pv->nrow); pv->set_desc_wfc_Eij(nlocal, nbands, pv->nrow); diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index 4fdbe229fb..fad2b24dfd 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -1,4 +1,5 @@ #include "elecstate_lcao.h" +#include #include "cal_dm.h" #include "module_base/timer.h" @@ -128,7 +129,7 @@ if(!GlobalV::dm_to_rho) } if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack" - || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao") + || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao" || GlobalV::KS_SOLVER == "pexsi") { for (int ik = 0; ik < psi.get_nk(); ik++) { @@ -183,6 +184,7 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao") { ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); + // get DMK in 2d-block format //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma); elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM)); @@ -196,7 +198,6 @@ void ElecStateLCAO::psiToRho(const psi::Psi& psi) } } ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d"); - for (int ik = 0; ik < psi.get_nk(); ++ik) { // for gamma_only case, no convertion occured, just for print. @@ -266,6 +267,69 @@ double ElecStateLCAO>::get_spin_constrain_energy() return sc.cal_escon(); } +#ifdef __PEXSI +template<> +void ElecStateLCAO::dmToRho(std::vector pexsi_DM, std::vector pexsi_EDM) +{ + ModuleBase::timer::tick("ElecStateLCAO", "dmToRho"); + + int nspin = GlobalV::NSPIN; + if (GlobalV::NSPIN == 4) + { + nspin = 1; + } + + // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25 + if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready + { + for (int is = 0; is < nspin; ++is) + { + this->loc->set_dm_gamma(is, pexsi_DM[is]); + } + this->loc->cal_dk_gamma_from_2D_pub(); + } + + this->get_DM()->pexsi_EDM = pexsi_EDM; + + for (int is = 0; is < nspin; is++) + { + this->DM->set_DMK_pointer(is, pexsi_DM[is]); + } + DM->cal_DMR(); + + for (int is = 0; is < GlobalV::NSPIN; is++) + { + ModuleBase::GlobalFunc::ZEROS(this->charge->rho[is], this->charge->nrxx); // mohan 2009-11-10 + } + + ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!"); + this->gint_gamma->transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint + Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho); + this->gint_gamma->cal_gint(&inout); + if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5) + { + for (int is = 0; is < GlobalV::NSPIN; is++) + { + ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[0], this->charge->nrxx); + } + Gint_inout inout1(this->loc->DM, this->charge->kin_r, Gint_Tools::job_type::tau); + this->gint_gamma->cal_gint(&inout1); + } + + this->charge->renormalize_rho(); + + ModuleBase::timer::tick("ElecStateLCAO", "dmToRho"); + return; +} + +template<> +void ElecStateLCAO>::dmToRho(std::vector*> pexsi_DM, std::vector*> pexsi_EDM) +{ + ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case"); +} + +#endif + template class ElecStateLCAO; // Gamma_only case template class ElecStateLCAO>; // multi-k case diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index fdc4dd080f..c3e7ae3a2d 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -1,6 +1,7 @@ #ifndef ELECSTATELCAO_H #define ELECSTATELCAO_H +#include #include "elecstate.h" #include "module_hamilt_lcao/module_gint/gint_gamma.h" #include "module_hamilt_lcao/module_gint/gint_k.h" @@ -64,6 +65,17 @@ class ElecStateLCAO : public ElecState double get_spin_constrain_energy() override; +#ifdef __PEXSI + // use for pexsi + + /** + * @brief calculate electronic charge density from pointers of density matrix calculated by pexsi + * @param pexsi_DM: pointers of density matrix (DMK) calculated by pexsi + * @param pexsi_EDM: pointers of energy-weighed density matrix (EDMK) calculated by pexsi, needed by MD, will be stored in DensityMatrix::pexsi_EDM + */ + void dmToRho(std::vector pexsi_DM, std::vector pexsi_EDM); +#endif + protected: // calculate electronic charge density on grid points or density matrix in real space // the consequence charge density rho saved into rho_out, preparing for charge mixing. diff --git a/source/module_elecstate/elecstate_print.cpp b/source/module_elecstate/elecstate_print.cpp index 78be828c66..d762f17826 100644 --- a/source/module_elecstate/elecstate_print.cpp +++ b/source/module_elecstate/elecstate_print.cpp @@ -305,6 +305,10 @@ void ElecState::print_etot(const bool converged, { label = "BP"; } + else if (ks_solver_type == "pexsi") + { + label = "PE"; + } else { ModuleBase::WARNING_QUIT("Energy", "print_etot found unknown ks_solver_type"); diff --git a/source/module_elecstate/module_dm/density_matrix.h b/source/module_elecstate/module_dm/density_matrix.h index 6255401850..90f0db4ae7 100644 --- a/source/module_elecstate/module_dm/density_matrix.h +++ b/source/module_elecstate/module_dm/density_matrix.h @@ -209,6 +209,14 @@ class DensityMatrix std::vector EDMK; // for TD-DFT +#ifdef __PEXSI + /** + * @brief EDM storage for PEXSI + * used in MD calculation + */ + std::vector pexsi_EDM; +#endif + private: /** * @brief HContainer for density matrix in real space for 2D parallelization @@ -256,6 +264,8 @@ class DensityMatrix * _nks = kv->_nks / nspin */ int _nks = 0; + + }; } // namespace elecstate diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index aa8e53bd7c..36a8bf35a4 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -107,7 +107,7 @@ void ESolver_KS_LCAO::beforesolver(const int istep) { nsk = GlobalV::NSPIN; ncol = this->LOWF.ParaV->ncol_bands; - if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "lapack_gvx" + if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "lapack_gvx" || GlobalV::KS_SOLVER=="pexsi" || GlobalV::KS_SOLVER == "cusolver") { ncol = this->LOWF.ParaV->ncol; diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp index e719bc8537..992619aa17 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp @@ -1,4 +1,5 @@ #include "FORCE_gamma.h" +#include "module_elecstate/elecstate_lcao.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_base/parallel_reduce.h" #include "module_base/timer.h" @@ -36,8 +37,22 @@ void Force_LCAO_gamma::cal_foverlap( // construct a DensityMatrix for Gamma-Only const Parallel_Orbitals* pv = this->ParaV; elecstate::DensityMatrix EDM(pv,GlobalV::NSPIN); - - elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM); + +#ifdef __PEXSI + if (GlobalV::KS_SOLVER == "pexsi") + { + auto pes = dynamic_cast*>(pelec); + for (int ik = 0; ik < GlobalV::NSPIN; ik++) + { + EDM.set_DMK_pointer(ik, pes->get_DM()->pexsi_EDM[ik]); + } + + } + else +#endif + { + elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM); + } ModuleBase::timer::tick("Force_LCAO_gamma","cal_edm_2d"); diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp index 77035035da..247c8d8b0e 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp @@ -63,7 +63,7 @@ void LCAO_Deepks::cal_o_delta_k(const std::vector= 0 && nu >= 0) { int iic; - if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx") // save the matrix as column major format + if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="pexsi") // save the matrix as column major format { iic = mu + nu * pv->nrow; } diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt index eac67c9b32..c4e7410da5 100644 --- a/source/module_hsolver/CMakeLists.txt +++ b/source/module_hsolver/CMakeLists.txt @@ -38,6 +38,14 @@ if(ENABLE_LCAO) add_coverage(diag_cusolver) endif() endif() + + if(ENABLE_PEXSI) + list(APPEND objects + diago_pexsi.cpp + ) + add_subdirectory(module_pexsi) + endif() + endif() add_library( @@ -57,4 +65,5 @@ endif() IF (BUILD_TESTING) add_subdirectory(test) add_subdirectory(kernels/test) + message(STATUS "Building tests") endif() diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp new file mode 100644 index 0000000000..4076922626 --- /dev/null +++ b/source/module_hsolver/diago_pexsi.cpp @@ -0,0 +1,95 @@ +#include +#include +#include +#ifdef __PEXSI +#include "diago_pexsi.h" +#include "module_base/global_variable.h" +#include "module_base/tool_quit.h" +#include "module_basis/module_ao/parallel_orbitals.h" +#include "module_pexsi/pexsi_solver.h" + +typedef hamilt::MatrixBlock matd; +typedef hamilt::MatrixBlock> matcd; + +namespace hsolver +{ +template +DiagoPexsi::DiagoPexsi(const Parallel_Orbitals* ParaV_in) +{ + int nspin = GlobalV::NSPIN; + if (GlobalV::NSPIN == 4) + { + nspin = 1; + } + mu_buffer.resize(nspin); + for (int i = 0; i < nspin; i++) + { + mu_buffer[i] = this->ps->pexsi_mu; + } + + this->ParaV = ParaV_in; + this->ps = std::make_unique(); + + this->DM.resize(nspin); + this->EDM.resize(nspin); + for (int i = 0; i < nspin; i++) + { + this->DM[i] = new T[ParaV->nrow * ParaV->ncol]; + this->EDM[i] = new T[ParaV->nrow * ParaV->ncol]; + } + +} + +template +DiagoPexsi::~DiagoPexsi() +{ + int nspin = GlobalV::NSPIN; + if (GlobalV::NSPIN == 4) + { + nspin = 1; + } + for (int i = 0; i < nspin; i++) + { + delete[] this->DM[i]; + delete[] this->EDM[i]; + } + +} + +template <> +void DiagoPexsi::diag(hamilt::Hamilt* phm_in, psi::Psi& psi, double* eigenvalue_in) +{ + ModuleBase::TITLE("DiagoPEXSI", "diag"); + matd h_mat, s_mat; + phm_in->matrix(h_mat, s_mat); + std::vector eigen(GlobalV::NLOCAL, 0.0); + int ik = psi.get_current_k(); + this->ps->prepare(this->ParaV->blacs_ctxt, + this->ParaV->nb, + this->ParaV->nrow, + this->ParaV->ncol, + h_mat.p, + s_mat.p, + DM[ik], + EDM[ik]); + this->ps->solve(mu_buffer[ik]); + this->totalFreeEnergy = this->ps->get_totalFreeEnergy(); + this->totalEnergyH = this->ps->get_totalEnergyH(); + this->totalEnergyS = this->ps->get_totalEnergyS(); + this->mu_buffer[ik] = this->ps->get_mu(); +} + +template <> +void DiagoPexsi>::diag(hamilt::Hamilt>* phm_in, + psi::Psi>& psi, + double* eigenvalue_in) +{ + ModuleBase::TITLE("DiagoPEXSI", "diag"); + ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case"); +} + +template class DiagoPexsi; +template class DiagoPexsi >; + +} // namespace hsolver +#endif \ No newline at end of file diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h new file mode 100644 index 0000000000..9802273e4b --- /dev/null +++ b/source/module_hsolver/diago_pexsi.h @@ -0,0 +1,35 @@ +#ifndef DIGAOPEXSI_H +#define DIGAOPEXSI_H + +#include +#include +#include "diagh.h" +#include "module_base/global_variable.h" +#include "module_basis/module_ao/parallel_orbitals.h" +#include "module_pexsi/pexsi_solver.h" + +namespace hsolver +{ + +template +class DiagoPexsi : public DiagH +{ + private: + using Real = typename GetTypeReal::type; + std::vector mu_buffer; + + public: + DiagoPexsi(const Parallel_Orbitals* ParaV_in); + void diag(hamilt::Hamilt* phm_in, psi::Psi& psi, Real* eigenvalue_in) override; + const Parallel_Orbitals* ParaV; + std::vector DM; + std::vector EDM; + double totalEnergyH; + double totalEnergyS; + double totalFreeEnergy; + std::unique_ptr ps; + ~DiagoPexsi(); +}; +} // namespace hsolver + +#endif diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp index 703430bbf0..70c635806c 100644 --- a/source/module_hsolver/hsolver_lcao.cpp +++ b/source/module_hsolver/hsolver_lcao.cpp @@ -16,6 +16,10 @@ #ifdef __CUDA #include "diago_cusolver.h" #endif +#ifdef __PEXSI +#include "diago_pexsi.h" +#include "module_elecstate/elecstate_lcao.h" +#endif namespace hsolver { @@ -139,6 +143,27 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, this->pdiagh->method = this->method; } } +#ifdef __PEXSI + else if (this->method == "pexsi") + { + if (this->pdiagh != nullptr) + { + if (this->pdiagh->method != this->method) + { + delete[] this->pdiagh; + this->pdiagh = nullptr; + } + auto tem = dynamic_cast*>(this->pdiagh); + } + if (this->pdiagh == nullptr) + { + DiagoPexsi* tem = new DiagoPexsi(this->ParaV); + this->pdiagh = tem; + // this->pdiagh = dynamic_cast*>(tem); + this->pdiagh->method = this->method; + } + } +#endif else { ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!"); @@ -183,7 +208,7 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, if (this->method != "genelpa" && this->method != "scalapack_gvx" && this->method != "lapack" - && this->method != "cusolver" && this->method != "cg_in_lcao") + && this->method != "cusolver" && this->method != "cg_in_lcao" && this->method != "pexsi") { delete this->pdiagh; this->pdiagh = nullptr; @@ -198,7 +223,21 @@ void HSolverLCAO::solveTemplate(hamilt::Hamilt* pHamilt, // calculate charge by psi // called in scf calculation - pes->psiToRho(psi); +#ifdef __PEXSI + if (this->method == "pexsi") + { + DiagoPexsi* tem = dynamic_cast*>(this->pdiagh); + if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!"); + elecstate::ElecStateLCAO* _pes = dynamic_cast*>(pes); + pes->f_en.eband = tem->totalFreeEnergy; + // maybe eferm could be dealt with in the future + _pes->dmToRho(tem->DM, tem->EDM); + } + else +#endif + { + pes->psiToRho(psi); + } ModuleBase::timer::tick("HSolverLCAO", "solve"); } diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt new file mode 100644 index 0000000000..87d16ff557 --- /dev/null +++ b/source/module_hsolver/module_pexsi/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp) + +if(ENABLE_COVERAGE) + add_coverage(pexsi) +endif() diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp new file mode 100644 index 0000000000..ff3f85f32b --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp @@ -0,0 +1,114 @@ +#ifdef __PEXSI +#include "dist_bcd_matrix.h" +#include +#include +extern "C" +{ + void Cblacs_gridinfo(int icontxt, int* nprow, int* npcol, int* myprow, int* mypcol); + int Cblacs_pnum(int blacs_ctxt, int prow, int pcol); +}; + +namespace pexsi +{ +DistBCDMatrix::DistBCDMatrix(MPI_Comm comm, + MPI_Group group, + int blacs_ctxt, + int size, + int nblk, + int nrow, + int ncol, + char layout) +{ + this->comm = comm; + this->group = group; + this->blacs_ctxt = blacs_ctxt; + this->size = size; + this->nblk = nblk; + this->nrow = nrow; + this->ncol = ncol; + if (layout == 'r' || layout == 'c') + { + this->layout = layout; + } + else + { + throw("The layout must be 'r' or 'c'"); + } + + if (comm != MPI_COMM_NULL) + { + MPI_Comm_rank(comm, &this->myproc); + Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol); + } + else + { + this->myproc = -1; + this->myprow = -1; + this->mypcol = -1; + } + + // synchronize matrix parameters to all processes, including those are not in bcd group + int myid_in_comm_world; + MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world); + if (myid_in_comm_world == 0) + { + MPI_Comm_size(comm, &this->nprocs); + int PARA_BCAST[4] = {this->nblk, this->nprocs, this->nprows, this->npcols}; + MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); + } + else + { + int PARA_BCAST[4]; + MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD); + this->nblk = PARA_BCAST[0]; + this->nprocs = PARA_BCAST[1]; + this->nprows = PARA_BCAST[2]; + this->npcols = PARA_BCAST[3]; + } + this->prowpcol2pnum = new int[this->nprocs]; + if (myid_in_comm_world == 0) + { + for (int i = 0; i < this->nprows; ++i) + { + for (int j = 0; j < this->npcols; ++j) + { + this->prowpcol2pnum[i * this->npcols + j] = Cblacs_pnum(this->blacs_ctxt, i, j); + } + } + } + MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD); +} + +DistBCDMatrix::~DistBCDMatrix() +{ + delete[] prowpcol2pnum; +} + +int DistBCDMatrix::globalRow(const int localRow) +{ + return (localRow / nblk * nprows + myprow) * nblk + localRow % nblk; +} + +int DistBCDMatrix::globalCol(const int localCol) +{ + return (localCol / nblk * npcols + mypcol) * nblk + localCol % nblk; +} + +int DistBCDMatrix::localRow(const int globalRow, int& myprow) +{ + myprow = int((globalRow % (nblk * nprows)) / nblk); + return int(globalRow / (nblk * nprows)) * nblk + globalRow % nblk; +} + +int DistBCDMatrix::localCol(const int globalCol, int& mypcol) +{ + mypcol = int((globalCol % (nblk * npcols)) / nblk); + return int(globalCol / (nblk * npcols)) * nblk + globalCol % nblk; +} + +int DistBCDMatrix::pnum(const int prow, const int pcol) +{ + return this->prowpcol2pnum[prow * this->npcols + pcol]; +} +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h new file mode 100644 index 0000000000..94b61277d2 --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h @@ -0,0 +1,85 @@ +#ifndef DISTBCDMATRIX_H +#define DISTBCDMATRIX_H + +#include + +#include "module_hsolver/module_pexsi/dist_matrix_transformer.h" +// a Block Cyclic Data Distribution matrix +// http://www.netlib.org/utk/papers/factor/node3.html +// local matrix elements is stored in column major +// used for pexsi +namespace pexsi +{ +class DistBCDMatrix +{ + public: + DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char layout); + ~DistBCDMatrix(); + + int globalRow(const int localRow); + int globalCol(const int localCol); + int localRow(const int globalRow, int& myprow); + int localCol(const int globalCol, int& mypcol); + int pnum(const int prow, const int pcol); + + const MPI_Comm get_comm() const + { + return comm; + }; + const MPI_Group get_group() const + { + return group; + }; + const int get_nrow() const + { + return nrow; + }; + const int get_ncol() const + { + return ncol; + }; + const char get_layout() const + { + return layout; + }; + + private: + // MPI communicator + MPI_Comm comm; + MPI_Group group; + + // blacs context + int blacs_ctxt; + + // row and column of process grid + int nprows; + int npcols; + + // total number of processes + int nprocs; + + // Matrix size + int size; + + // block size + int nblk; + + // row and c0lumn of Local matrix part + int nrow; + int ncol; + + // current process row and column + int myprow; + int mypcol; + + // current process id + int myproc; + + int* prowpcol2pnum; + // the local data layout + // 'R' or 'r' for row-major, which is used in C/C++ + // 'C' or 'c' for column-major, which is used in Fortran + char layout; +}; +} // namespace pexsi +#endif // DISTBCDMATRIX_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp new file mode 100644 index 0000000000..74391f2fbe --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp @@ -0,0 +1,119 @@ +#ifdef __PEXSI +#include "dist_ccs_matrix.h" + +#include + +namespace pexsi +{ +DistCCSMatrix::DistCCSMatrix(void) +{ + this->comm = MPI_COMM_WORLD; + this->size = 0; + this->nnz = 0; + this->nnzLocal = 0; + this->numColLocal = 0; + this->colptrLocal = nullptr; + this->rowindLocal = nullptr; +} + +DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in) +{ + this->comm = comm_in; + this->size = 0; + this->nnz = 0; + this->nnzLocal = 0; + this->numColLocal = 0; + this->colptrLocal = nullptr; + this->rowindLocal = nullptr; +} + +DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in) +{ + this->comm = MPI_COMM_WORLD; + this->size = size_in; + this->nnzLocal = nnzLocal_in; + MPI_Request req; + MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req); + this->numColLocal = 0; + this->colptrLocal = new int[size]; + this->rowindLocal = new int[nnzLocal]; + + MPI_Status req_status; + MPI_Wait(&req, &req_status); +} + +DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in) +{ + this->comm = comm_in; + this->nproc_data = nproc_data_in; + int nproc_data_range[3] = {0, this->nproc_data - 1, 1}; + // create processes group with data: this->group_data and associated communicator + MPI_Comm_group(this->comm, &this->group); + MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data); + this->comm_data = MPI_COMM_NULL; + MPI_Comm_create(this->comm, this->group_data, &this->comm_data); + this->size = size_in; + this->nnz = 0; + this->nnzLocal = 0; + int myproc; + if (comm != MPI_COMM_NULL) + { + MPI_Comm_size(comm, &nprocs); + MPI_Comm_rank(comm, &myproc); + if (myproc < nproc_data - 1) + { + this->numColLocal = size / nproc_data; + this->firstCol = size / nproc_data * myproc; + this->colptrLocal = new int[this->numColLocal + 1]; + this->rowindLocal = nullptr; + } + else if (myproc == nproc_data - 1) + { + this->numColLocal = size - myproc * (size / nproc_data); + this->firstCol = size / nproc_data * myproc; + this->colptrLocal = new int[this->numColLocal + 1]; + this->rowindLocal = nullptr; + } + else + { + this->numColLocal = 0; + this->firstCol = size - 1; + this->colptrLocal = new int[this->numColLocal + 1]; + this->rowindLocal = nullptr; + } + } +} + +int DistCCSMatrix::globalCol(int localCol) +{ + return this->firstCol + localCol; +} + +// NOTE: the process id is 0-based +int DistCCSMatrix::localCol(int globalCol, int& mypcol) +{ + mypcol = int(globalCol / int(this->size / this->nproc_data)); + if (mypcol >= this->nproc_data) + mypcol = this->nproc_data - 1; + + return mypcol > 0 ? globalCol - (this->size / this->nproc_data) * mypcol : globalCol; +} + +void DistCCSMatrix::setnnz(int nnzLocal_in) +{ + if (this->comm_data != MPI_COMM_NULL) + { + MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data); + this->nnzLocal = nnzLocal_in; + this->rowindLocal = new int[nnzLocal]; + this->colptrLocal[this->numColLocal] = nnzLocal_in + 1; + } +} + +DistCCSMatrix::~DistCCSMatrix() +{ + delete[] colptrLocal; + delete[] rowindLocal; +} +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h new file mode 100644 index 0000000000..a63a0dc16c --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h @@ -0,0 +1,95 @@ +#ifndef DISTCCSMATRIX_H +#define DISTCCSMATRIX_H + +#include +// Distributed Compressed Column Storage Matrix format +// used for PEXSI +namespace pexsi +{ +class DistCCSMatrix +{ + + public: + DistCCSMatrix(); + DistCCSMatrix(MPI_Comm comm); + DistCCSMatrix(int size, int nnzLocal); + DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal); + DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index); + + int globalCol(int localCol); + int localCol(int globalCol, int& mypcol); + void setnnz(int nnzLocal); + + const MPI_Comm get_comm() const + { + return comm; + }; + const MPI_Group get_group() const + { + return group; + }; + const MPI_Group get_group_data() const + { + return group_data; + }; + const int get_size() const + { + return size; + }; + const int get_nnz() const + { + return nnz; + }; + const int get_nnzlocal() const + { + return nnzLocal; + }; + const int get_numcol_local() const + { + return numColLocal; + }; + int* get_colptr_local() const + { + return colptrLocal; + }; + int* get_rowind_local() const + { + return rowindLocal; + }; + + ~DistCCSMatrix(); + + private: + // MPI communicator + MPI_Comm comm; + MPI_Group group; + + // total number of processes and the processes with data in + int nprocs; + int nproc_data; + MPI_Group group_data; + MPI_Comm comm_data; + + // Matrix size + int size; + + // Number of non-zero values in the matrix + int nnz; + + // Number of non-zero values in the matrix of the local process + int nnzLocal; + + // number of columns in current process + int numColLocal; + + // the first column index in current process + int firstCol; + + // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal + int* colptrLocal; + int* rowindLocal; + + // friend class DistMatrixTransformer; +}; +} // namespace pexsi +#endif // DISTCCSMATRIX_H diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp new file mode 100644 index 0000000000..313a840e68 --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp @@ -0,0 +1,765 @@ +#ifdef __PEXSI +#include "dist_matrix_transformer.h" + +#include + +#include +#include +#include +#include +#include +#include + +#include "dist_bcd_matrix.h" +#include "dist_ccs_matrix.h" + +namespace pexsi +{ +// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a +// negtive value the size_process and displacement_process array will be changed after the index is found isFirst: +// wether this function is called for the first time for a index array; nprocs: total number of processes size_process: +// the number of indices in each process displacement_process: the start position in each process index: the array +// contains the indices +inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index) +{ + // usually the minimum index is continuous, so it will be a good idea to + // check the one next to the previous index first. + static int pre_position; // previous position in index array of minimum index, + static int pre_process; // the process contains previous index + + int minimum_index + = INT_MAX; // the minimum index, initial value is a large number which is larger than any other index; + int minimum_position = -1; + int minimum_process = -1; + + if (isFirst) + { + for (int i = 0; i < nprocs; ++i) + { + if (size_process[i] > 0) + { + if (minimum_index > index[displacement_process[i]]) // find a smaller index + { + minimum_position = displacement_process[i]; + minimum_index = index[minimum_position]; + minimum_process = i; + } + } + } + if (minimum_process >= 0) // find it! + { + ++displacement_process[minimum_process]; + --size_process[minimum_process]; + } + pre_position = minimum_position; + pre_process = minimum_process; + return minimum_position; + } + else + { + // check the next one of pre_position + if (size_process[pre_process] > 0 && // the previous process still has elements + index[pre_position + 1] == index[pre_position] + 1) // find it! + { + ++displacement_process[pre_process]; + --size_process[pre_process]; + ++pre_position; // new pre_position is the next one + // new pre_process keeps the same + return pre_position; // current position is the new pre_position + } + + // if the next one of pre_position is not the minimum one + for (int i = 0; i < nprocs; ++i) + { + if (size_process[i] > 0) + { + if (minimum_index > index[displacement_process[i]]) + { + minimum_position = displacement_process[i]; + minimum_index = index[minimum_position]; + minimum_process = i; + } + } + } + if (minimum_process >= 0) // find it! + { + ++displacement_process[minimum_process]; + --size_process[minimum_process]; + } + pre_position = minimum_position; + pre_process = minimum_process; + return minimum_position; + } +} + +inline void DistMatrixTransformer::buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex) +{ + // find the minimum one from left buffer index + if (DST_Matrix.get_nnzlocal() <= 0) + return; + + int pre_col = -1; + int nnz_now = 0; + int p_mini; + p_mini = MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index); + while (p_mini >= 0) + { + int index_mini = position_index[p_mini]; + int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol; + int row_mini = index_mini % DST_Matrix.get_size(); + if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array + { + pre_col = col_mini; + DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1; + } + DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based + // copy data from buffer to M, be careful M is a 0-based array + buffer2ccsIndex[nnz_now] = p_mini; + ++nnz_now; + p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index); + } + // The last element of colptrLocal is nnzLocal+1 + DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1; +} + +inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal, + int* buffer2ccsIndex, + double* buffer, + double* nzvalLocal) +{ + for (int i = 0; i < nnzLocal; ++i) + { + nzvalLocal[i] = buffer[buffer2ccsIndex[i]]; + } +} +inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map& P) +{ + for (int i = 0; i < N; ++i) + { + int key; + if (fabs(A[i] < 1e-31)) + key = -100; + else + key = floor(log10(fabs(A[i]))); + ++P[key]; + } +} + +// find out the index of non-zero elements +inline int DistMatrixTransformer::getNonZeroIndex(char layout, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx) +{ + int idx = 0; + nnz = 0; + colidx.clear(); + rowidx.clear(); + if (layout == 'c') + { + for (int i = 0; i < ncol; ++i) + { + for (int j = 0; j < nrow; ++j) + { + idx = i * nrow + j; + if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) + { + ++nnz; + colidx.push_back(i); + rowidx.push_back(j); + } + } + } + } + else if (layout == 'r') + { + for (int i = 0; i < ncol; ++i) + { + for (int j = 0; j < nrow; ++j) + { + idx = j * ncol + i; + if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit) + { + ++nnz; + colidx.push_back(i); + rowidx.push_back(j); + } + } + } + } + else + { + return 1; + } + return 0; +} + +int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex) +{ + int myproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + sender_size = nnz; + std::fill(sender_size_process.begin(), sender_size_process.end(), 0); + // create process id map from group_data to group_trans + int nproc_data; + std::vector proc_map_data_trans; + if (myproc == 0) + { + MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data); + MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); + proc_map_data_trans.resize(nproc_data, 0); + for (int i = 0; i < nproc_data; ++i) + { + MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]); + } + MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS); + } + else + { + MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS); + proc_map_data_trans.resize(nproc_data, 0); + MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS); + } + + for (int i = 0; i < nnz; ++i) + { + int l_col = colidx[i]; + int g_col = SRC_Matrix.globalCol(l_col); + int dst_process; + int dst_col = DST_Matrix.localCol(g_col, dst_process); + int dst_process_trans = proc_map_data_trans[dst_process]; + ++sender_size_process[dst_process_trans]; + } + // transfer sender index size to receiver index size + MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS); + // setup all2all parameters + sender_displacement_process[0] = 0; + for (int i = 1; i < NPROC_TRANS; ++i) + { + sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1]; + } + + receiver_displacement_process[0] = 0; + receiver_size = receiver_size_process[0]; + for (int i = 1; i < NPROC_TRANS; ++i) + { + receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1]; + receiver_size += receiver_size_process[i]; + } + + // setup receiver index + // setup sender_index + std::vector sender_index(sender_size); + for (int i = 0; i < nnz; ++i) + { + int l_col = colidx[i]; + int g_col = SRC_Matrix.globalCol(l_col); + int dst_process; + int dst_col = DST_Matrix.localCol(g_col, dst_process); + int l_row = rowidx[i]; + int dst_row = SRC_Matrix.globalRow(l_row); + sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row; + } + + // transfer index to receiver + std::vector receiver_index(receiver_size); + MPI_Alltoallv(&sender_index[0], + &sender_size_process[0], + &sender_displacement_process[0], + MPI_INT, + &receiver_index[0], + &receiver_size_process[0], + &receiver_displacement_process[0], + MPI_INT, + COMM_TRANS); + + // setup buffer2ccsIndex based on receiver_index + buffer2ccsIndex.resize(receiver_size); + DST_Matrix.setnnz(receiver_size); + buildCCSParameter(receiver_size, + NPROC_TRANS, + receiver_size_process, + receiver_displacement_process, + &receiver_index[0], + DST_Matrix, + &buffer2ccsIndex[0]); + return 0; +} + +int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS) +{ + // build transfortram communicator which contains both processes of BCD processors and + // CCS processors with nonzero elements + MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS); + MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS); + return 0; +} + +int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS) +{ + MPI_Group_free(&GROUP_TRANS); + if (COMM_TRANS != MPI_COMM_NULL) + { + MPI_Comm_free(&COMM_TRANS); + } + return 0; +} + +// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution +// two destination matrices share the same non-zero elements positions +// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, +// even if one of them is acturely zero All matrices must have same MPI communicator +int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + double*& H_ccs, + double*& S_ccs) +{ + MPI_Group GROUP_TRANS; + MPI_Comm COMM_TRANS = MPI_COMM_NULL; + newGroupCommTrans(SRC_Matrix, DST_Matrix, GROUP_TRANS, COMM_TRANS); + if (COMM_TRANS != MPI_COMM_NULL) + { + // set up sender and receiver= + int NPROC_TRANS; + MPI_Comm_size(COMM_TRANS, &NPROC_TRANS); + int sender_size; + std::vector sender_size_process(NPROC_TRANS); + std::vector sender_displacement_process(NPROC_TRANS); + int receiver_size; + std::vector receiver_size_process(NPROC_TRANS); + std::vector receiver_displacement_process(NPROC_TRANS); + + // find out the non-zeros elements' positions + std::vector rowidx; + std::vector colidx; + int nnz = 0; + if (SRC_Matrix.get_comm() != MPI_COMM_NULL) + { + getNonZeroIndex(SRC_Matrix.get_layout(), + SRC_Matrix.get_nrow(), + SRC_Matrix.get_ncol(), + H_2d, + S_2d, + ZERO_Limit, + nnz, + rowidx, + colidx); + } + // build all2all transformation parameters and the map index of receiver buffer + std::vector buffer2ccsIndex; + buildTransformParameter(SRC_Matrix, + DST_Matrix, + NPROC_TRANS, + GROUP_TRANS, + COMM_TRANS, + nnz, + rowidx, + colidx, + sender_size, + sender_size_process, + sender_displacement_process, + receiver_size, + receiver_size_process, + receiver_displacement_process, + buffer2ccsIndex); +// Do transformation + std::vector sender_buffer(sender_size); + std::vector receiver_buffer(receiver_size); + // put H to sender buffer + if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r') + { + for (int i = 0; i < sender_size; ++i) + { + sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; + } + } + else + { + for (int i = 0; i < sender_size; ++i) + { + sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; + } + } + // do all2all transformation + MPI_Alltoallv(&sender_buffer[0], + &sender_size_process[0], + &sender_displacement_process[0], + MPI_DOUBLE, + &receiver_buffer[0], + &receiver_size_process[0], + &receiver_displacement_process[0], + MPI_DOUBLE, + COMM_TRANS); +// collect H from receiver buffer + delete[] H_ccs; + H_ccs = new double[receiver_size]; + buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], H_ccs); + + // put S to sender buffer + if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r') + { + for (int i = 0; i < sender_size; ++i) + { + sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]]; + } + } + else + { + for (int i = 0; i < sender_size; ++i) + { + sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]]; + } + } + // do all2all transformation + MPI_Alltoallv(&sender_buffer[0], + &sender_size_process[0], + &sender_displacement_process[0], + MPI_DOUBLE, + &receiver_buffer[0], + &receiver_size_process[0], + &receiver_displacement_process[0], + MPI_DOUBLE, + COMM_TRANS); +// collect S from receiver buffer + delete[] S_ccs; + S_ccs = new double[receiver_size]; + buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], S_ccs); + } + // clear and return + deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS); + return 0; +} + +// transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution +// two source matrices share the same non-zero elements positions +int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + double* DMnzvalLocal, + double* EDMnzvalLocal, + DistBCDMatrix& DST_Matrix, + double* DM, + double* EDM) +{ + int myproc; + MPI_Comm_rank(MPI_COMM_WORLD, &myproc); + MPI_Group GROUP_TRANS; + MPI_Comm COMM_TRANS = MPI_COMM_NULL; + newGroupCommTrans(DST_Matrix, SRC_Matrix, GROUP_TRANS, COMM_TRANS); + if (COMM_TRANS != MPI_COMM_NULL) + { + // init DM and EDM with 0 + for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i) + { + DM[i] = 0; + EDM[i] = 0; + } + // setup number of local elements to be transfered to each remote processes + int NPROC_TRANS; + MPI_Comm_size(COMM_TRANS, &NPROC_TRANS); + int sender_size_process[NPROC_TRANS]; + int sender_displacement_process[NPROC_TRANS]; + int receiver_size_process[NPROC_TRANS]; + int receiver_displacement_process[NPROC_TRANS]; + int nproc_bcd; + std::vector proc_map_bcd_trans; + int myproc_trans; + MPI_Comm_rank(COMM_TRANS, &myproc_trans); + if (myproc_trans == 0) + { + MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd); + MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); + proc_map_bcd_trans.resize(nproc_bcd, 0); + for (int i = 0; i < nproc_bcd; ++i) + { + MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]); + } + MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS); + } + else + { + MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS); + proc_map_bcd_trans.resize(nproc_bcd, 0); + MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS); + } + // setup sender_size_process + // std::fill(sender_size_process.begin(), sender_size_process.end(), 0); + for (int i = 0; i < NPROC_TRANS; ++i) + sender_size_process[i] = 0; + for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) + { + int g_col = SRC_Matrix.globalCol(icol); + int recv_pcol_bcd; + int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd); + + for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx) + { + int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1; + int recv_prow_bcd; + int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd); + int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd); + int recv_proc = proc_map_bcd_trans[recv_proc_bcd]; + ++sender_size_process[recv_proc]; + } + // log<<"\n"; + } + // setup receiver_size_process + // std::fill(receiver_size_process.begin(), receiver_size_process.end(), 0); + for (int i = 0; i < NPROC_TRANS; ++i) + receiver_size_process[i] = 0; + MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS); + // setup sender_displacement and receiver_displacement + sender_displacement_process[0] = 0; + receiver_displacement_process[0] = 0; + int receiver_size = receiver_size_process[0]; + for (int i = 1; i < NPROC_TRANS; ++i) + { + sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1]; + receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1]; + receiver_size += receiver_size_process[i]; + } + + // setup up sender index and receiver index + int sender_size = SRC_Matrix.get_nnzlocal(); + int* sender_index; + double* sender_buffer; + int* dst_index; + int* receiver_index; + double* receiver_buffer; + + if (sender_size > 0) + { + sender_index = new int[sender_size]; + for (int i = 0; i < sender_size; ++i) + { + sender_index[i] = -1; + } + sender_buffer = new double[sender_size]; + dst_index = new int[2 * sender_size]; + for (int i = 0; i < 2 * sender_size; ++i) + { + dst_index[i] = -1; + } + } + else + { + sender_index = new int[1]; + sender_index[0] = -1; + sender_buffer = new double[1]; + dst_index = new int[2]; + dst_index[0] = -1; + dst_index[1] = -1; + } + + if (receiver_size > 0) + { + receiver_index = new int[2 * receiver_size]; + receiver_buffer = new double[receiver_size]; + for (int i = 0; i < 2 * receiver_size; ++i) + { + receiver_index[i] = -1; + } + for (int i = 0; i < receiver_size; ++i) + { + receiver_buffer[i] = -1; + } + } + else + { + receiver_index = new int[2]; + receiver_buffer = new double[1]; + receiver_index[0] = -1; + receiver_index[1] = -1; + receiver_buffer[0] = -1; + } + + // pointer to the first empty slot of each process + // std::vector p(sender_displacement_process); + int p[NPROC_TRANS]; + for (int i = 0; i < NPROC_TRANS; ++i) + { + p[i] = sender_displacement_process[i]; + } + + int idx = 0; + + for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol) + { + int g_col = SRC_Matrix.globalCol(icol); + int recv_pcol_bcd; + int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd); + for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx) + { + int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1; + int recv_prow_bcd; + int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd); + + int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd); + + int recv_proc = proc_map_bcd_trans[recv_proc_bcd]; + + sender_index[p[recv_proc]] = idx; + + dst_index[p[recv_proc] * 2] = recv_row; + dst_index[p[recv_proc] * 2 + 1] = recv_col; + ++p[recv_proc]; + ++idx; + } + } + + for (int i = 0; i < NPROC_TRANS; ++i) + { + sender_size_process[i] *= 2; + sender_displacement_process[i] *= 2; + receiver_size_process[i] *= 2; + receiver_displacement_process[i] *= 2; + } + + MPI_Alltoallv(&dst_index[0], + &sender_size_process[0], + &sender_displacement_process[0], + MPI_INT, + &receiver_index[0], + &receiver_size_process[0], + &receiver_displacement_process[0], + MPI_INT, + COMM_TRANS); + + // reset size and displacement for transfering matrix value by alltoall + for (int i = 0; i < NPROC_TRANS; ++i) + { + sender_size_process[i] /= 2; + sender_displacement_process[i] /= 2; + receiver_size_process[i] /= 2; + receiver_displacement_process[i] /= 2; + } + + // transfer DM + // set up DM sender buffer + for (int i = 0; i < sender_size; ++i) + { + sender_buffer[i] = DMnzvalLocal[sender_index[i]]; + } + + // transfer sender buffer to receiver buffer + MPI_Alltoallv(&sender_buffer[0], + &sender_size_process[0], + &sender_displacement_process[0], + MPI_DOUBLE, + &receiver_buffer[0], + &receiver_size_process[0], + &receiver_displacement_process[0], + MPI_DOUBLE, + COMM_TRANS); + + // transform receiver_buffer to DM + if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r') + { + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + for (int i = 0; i < receiver_size; ++i) + { + int ix = receiver_index[2 * i]; + int iy = receiver_index[2 * i + 1]; + int idx = ix * DST_Matrix.get_ncol() + iy; + DM[idx] = receiver_buffer[i]; + } + } + else + { + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + for (int i = 0; i < receiver_size; ++i) + { + int ix = receiver_index[2 * i]; + int iy = receiver_index[2 * i + 1]; + int idx = iy * DST_Matrix.get_nrow() + ix; + DM[idx] = receiver_buffer[i]; + } + } + + // setup up sender buffer of EDM + for (int i = 0; i < sender_size; ++i) + { + sender_buffer[i] = EDMnzvalLocal[sender_index[i]]; + } + + // transfer sender buffer to receiver buffer + MPI_Alltoallv(&sender_buffer[0], + &sender_size_process[0], + &sender_displacement_process[0], + MPI_DOUBLE, + &receiver_buffer[0], + &receiver_size_process[0], + &receiver_displacement_process[0], + MPI_DOUBLE, + COMM_TRANS); + + // transform receiver_buffer to EDM + if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r') + { + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + for (int i = 0; i < receiver_size; ++i) + { + int ix = receiver_index[2 * i]; + int iy = receiver_index[2 * i + 1]; + int idx = ix * DST_Matrix.get_ncol() + iy; + EDM[idx] = receiver_buffer[i]; + } + } + else + { + int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); + for (int i = 0; i < receiver_size; ++i) + { + int ix = receiver_index[2 * i]; + int iy = receiver_index[2 * i + 1]; + int idx = iy * DST_Matrix.get_nrow() + ix; + EDM[idx] = receiver_buffer[i]; + } + } + + delete[] sender_index; + delete[] sender_buffer; + delete[] dst_index; + delete[] receiver_index; + delete[] receiver_buffer; + + } + deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS); + return 0; +} + +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h new file mode 100644 index 0000000000..672b22f4f3 --- /dev/null +++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h @@ -0,0 +1,91 @@ +#ifndef DISTMATRIXTRANSFORMER_H +#define DISTMATRIXTRANSFORMER_H + +#include +#include +#include +// transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution +// they should have same MPI communicator +// The local matrix of BCD is column-major order +// int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, const double ZERO_Limit, +// DistCCSMatrix &DST_Matrix, double*& H_ccs); + +// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution +// two destination matrices share the same non-zero elements positions +// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero, +// even if one of them is acturely zero All matrices must have same MPI communicator +namespace pexsi +{ +class DistBCDMatrix; +class DistCCSMatrix; + +namespace DistMatrixTransformer +{ +int MinimumIndexPosition(const bool isFirst, + const int nprocs, + int* size_process, + int* displacement_process, + const int* index); + +void buildCCSParameter(const int size, + const int nprocs, + std::vector size_process, + std::vector displacement_process, + const int* position_index, + DistCCSMatrix& DST_Matrix, + int* buffer2ccsIndex); + +void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal); + +void countMatrixDistribution(int N, double* A, std::map& P); + +int getNonZeroIndex(char layout, + const int nrow, + const int ncol, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + int& nnz, + std::vector& rowidx, + std::vector& colidx); + +int buildTransformParameter(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + const int NPROC_TRANS, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS, + const int nnz, + std::vector& rowidx, + std::vector& colidx, + int& sender_size, + std::vector& sender_size_process, + std::vector& sender_displacement_process, + int& receiver_size, + std::vector& receiver_size_process, + std::vector& receiver_displacement_process, + std::vector& buffer2ccsIndex); + +int newGroupCommTrans(DistBCDMatrix& SRC_Matrix, + DistCCSMatrix& DST_Matrix, + MPI_Group& GROUP_TRANS, + MPI_Comm& COMM_TRANS); + +int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS); + +int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix, + double* H_2d, + double* S_2d, + const double ZERO_Limit, + DistCCSMatrix& DST_Matrix, + double*& H_ccs, + double*& S_ccs); + +int transformCCStoBCD(DistCCSMatrix& SRC_Matrix, + double* DMnzvalLocal, + double* ENDnzvalLocal, + DistBCDMatrix& DST_Matrix, + double* DM_2d, + double* ED_2d); +}; // namespace DistMatrixTransformer +} // namespace pexsi +#endif // DISTMATRIXTRANSFORMER_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp new file mode 100644 index 0000000000..7a71e6ca01 --- /dev/null +++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp @@ -0,0 +1,121 @@ +#include "module_base/parallel_global.h" +#ifdef __PEXSI +#include "pexsi_solver.h" + +#include +#include +#include + +#include "module_base/global_variable.h" +#include "simple_pexsi.h" + +extern MPI_Comm DIAG_WORLD; +extern MPI_Comm GRID_WORLD; +namespace pexsi +{ + +int PEXSI_Solver::pexsi_npole = 0; +bool PEXSI_Solver::pexsi_inertia = 0; +int PEXSI_Solver::pexsi_nmax = 0; +// int PEXSI_Solver::pexsi_symbolic = 0; +bool PEXSI_Solver::pexsi_comm = 0; +bool PEXSI_Solver::pexsi_storage = 0; +int PEXSI_Solver::pexsi_ordering = 0; +int PEXSI_Solver::pexsi_row_ordering = 0; +int PEXSI_Solver::pexsi_nproc = 0; +bool PEXSI_Solver::pexsi_symm = 0; +bool PEXSI_Solver::pexsi_trans = 0; +int PEXSI_Solver::pexsi_method = 0; +int PEXSI_Solver::pexsi_nproc_pole = 0; +// double PEXSI_Solver::pexsi_spin = 2; +double PEXSI_Solver::pexsi_temp = 0.0; +double PEXSI_Solver::pexsi_gap = 0.0; +double PEXSI_Solver::pexsi_delta_e = 0.0; +double PEXSI_Solver::pexsi_mu_lower = 0.0; +double PEXSI_Solver::pexsi_mu_upper = 0.0; +double PEXSI_Solver::pexsi_mu = 0.0; +double PEXSI_Solver::pexsi_mu_thr = 0.0; +double PEXSI_Solver::pexsi_mu_expand = 0.0; +double PEXSI_Solver::pexsi_mu_guard = 0.0; +double PEXSI_Solver::pexsi_elec_thr = 0.0; +double PEXSI_Solver::pexsi_zero_thr = 0.0; + +void PEXSI_Solver::prepare(const int blacs_text, + const int nb, + const int nrow, + const int ncol, + const double* h, + const double* s, + double*& _DM, + double*& _EDM) +{ + this->blacs_text = blacs_text; + this->nb = nb; + this->nrow = nrow; + this->ncol = ncol; + this->h = const_cast(h); + this->s = const_cast(s); + this->DM = _DM; + this->EDM = _EDM; + this->totalEnergyH = 0.0; + this->totalEnergyS = 0.0; + this->totalFreeEnergy = 0.0; +} + +int PEXSI_Solver::solve(double mu0) +{ + MPI_Group grid_group; + int myid, grid_np; + MPI_Group world_group; + MPI_Comm_rank(DIAG_WORLD, &myid); + MPI_Comm_size(DIAG_WORLD, &grid_np); + MPI_Comm_group(DIAG_WORLD, &world_group); + + int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np}; + MPI_Group_range_incl(world_group, 1, &grid_proc_range, &grid_group); + + simplePEXSI(DIAG_WORLD, + DIAG_WORLD, + grid_group, + this->blacs_text, + GlobalV::NLOCAL, + this->nb, + this->nrow, + this->ncol, + 'c', + this->h, + this->s, + GlobalV::nelec, + "PEXSIOPTION", + this->DM, + this->EDM, + this->totalEnergyH, + this->totalEnergyS, + this->totalFreeEnergy, + mu, + mu0); + return 0; +} + +const double PEXSI_Solver::get_totalFreeEnergy() const +{ + return totalFreeEnergy; +} + +const double PEXSI_Solver::get_totalEnergyH() const +{ + return totalEnergyH; +} + +const double PEXSI_Solver::get_totalEnergyS() const +{ + return totalEnergyS; +} + +const double PEXSI_Solver::get_mu() const +{ + return mu; +} + +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h new file mode 100644 index 0000000000..b041d13656 --- /dev/null +++ b/source/module_hsolver/module_pexsi/pexsi_solver.h @@ -0,0 +1,143 @@ +#ifndef PEXSI_Solver_H +#define PEXSI_Solver_H + +#include + +namespace pexsi +{ +class PEXSI_Solver +{ + public: + void prepare(const int blacs_text, + const int nb, + const int nrow, + const int ncol, + const double* h, + const double* s, + double*& DM, + double*& EDM); + int solve(double mu0); + const double get_totalFreeEnergy() const; + const double get_totalEnergyH() const; + const double get_totalEnergyS() const; + const double get_mu() const; + + //========================================================== + // PEXSI related variables + //========================================================== + /** + * @brief Number of terms in the pole expansion. + */ + static int pexsi_npole; + /** + * @brief Whether inertia counting is used at the very beginning. + */ + static bool pexsi_inertia; + /** + * @brief Maximum number of PEXSI iterations after each inertia counting procedure. + */ + static int pexsi_nmax; + /** + * @brief Whether to construct PSelInv communication pattern. + */ + static bool pexsi_comm; + /** + * @brief Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices. + */ + static bool pexsi_storage; + /** + * @brief Ordering strategy for factorization and selected inversion. + */ + static int pexsi_ordering; + /** + * @brief row permutation strategy for factorization and selected inversion. + */ + static int pexsi_row_ordering; + /** + * @brief Number of processors for PARMETIS/PT-SCOTCH. Only used if the ordering == 0. + */ + static int pexsi_nproc; + /** + * @brief Matrix structure. + * - = 0 : Unsymmetric matrix + * - = 1 : Symmetric matrix (default). + */ + static bool pexsi_symm; + /** + * @brief Transpose. + * - = 0 : Factor non transposed matrix (default). + * - = 1 : Factor transposed matrix. + */ + static bool pexsi_trans; + /** + * @brief The pole expansion method to be used. + * - = 1 : Cauchy Contour Integral method used. + * - = 2 : Moussa optimized method. + */ + static int pexsi_method; + /** + * @brief The point parallelizaion of PEXSI. + * - = 2 : Recommend two points parallelization + */ + static int pexsi_nproc_pole; + /** + * @brief Temperature, in the same unit as H + */ + static double pexsi_temp; + /** + * @brief Spectral gap. **Note** This can be set to be 0 in most cases. + */ + static double pexsi_gap; + /** + * @brief An upper bound for the spectral radius of \f$S^{-1} H\f$. + */ + static double pexsi_delta_e; + /** + * @brief Initial guess of lower bound for mu. + */ + static double pexsi_mu_lower; + /** + * @brief Initial guess of upper bound for mu. + */ + static double pexsi_mu_upper; + /** + * @brief Initial guess for mu (for the solver) (AG) + */ + static double pexsi_mu; + /** + * @brief Stopping criterion in terms of the chemical potential for the inertia counting procedure. + */ + static double pexsi_mu_thr; + /** + * @brief If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion. + */ + static double pexsi_mu_expand; + /** + * @brief Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure. + */ + static double pexsi_mu_guard; + /** + * @brief Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact. + */ + static double pexsi_elec_thr; + /** + * @brief If the absolute value of CCS matrix element is less than this value, it will be considered as zero. + */ + static double pexsi_zero_thr; + + private: + int blacs_text; + int nb; + int nrow; + int ncol; + double* h; + double* s; + double* DM; + double* EDM; + double totalEnergyH; + double totalEnergyS; + double totalFreeEnergy; + double mu; +}; +} // namespace pexsi +#endif // PEXSI_Solver_H \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp new file mode 100644 index 0000000000..075c803182 --- /dev/null +++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp @@ -0,0 +1,365 @@ +// use PEXSI to solve a Kohn-Sham equation +// the H and S matrices are given by 2D block cyclic distribution +// the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution +// #include "mpi.h" +#ifdef __PEXSI +#include + +#include +#include +#include +#include +#include +#include + +#include "c_pexsi_interface.h" +#include "dist_bcd_matrix.h" +#include "dist_ccs_matrix.h" +#include "dist_matrix_transformer.h" +#include "module_base/lapack_connector.h" +#include "module_base/timer.h" +#include "module_base/tool_quit.h" +#include "module_base/global_variable.h" +#include "module_hsolver/diago_pexsi.h" + +namespace pexsi +{ +inline void strtolower(char* sa, char* sb) +{ + char c; + int len = strlen(sa); + for (int i = 0; i < len; i++) + { + c = sa[i]; + sb[i] = tolower(c); + } + sb[len] = '\0'; +} + +inline void setDefaultOption(int* int_para, double* double_para) +{ + double_para[0] = 2; + double_para[2] = 0; + double_para[11] = DBL_MIN; + int_para[3] = 0; + int_para[6] = 1; + int_para[8] = 0; + int_para[9] = 0; + int_para[11] = 0; + int_para[12] = 0; + int_para[14] = 2; + int_para[15] = 1; +} + +int loadPEXSIOption(MPI_Comm comm, + const std::string PexsiOptionFile, + PPEXSIOptions& options, + int& numProcessPerPole, + double& ZERO_Limit) +{ + + // temp variable arrays read from conf file and will be bcast to all processors + + // parameter array of type int, + // 0: numPole + // 1: isInertiaCount + // 2: maxPEXSIIter + // 3: matrixType + // 4: isSymbolicFactorize + // 5: isConstructCommPattern + // 6: solver + // 7: symmetricStorage + // 8: ordering + // 9: rowOrdering + // 10: npSymbFact + // 11: symmetric + // 12: transpose + // 13: method + // 14: nPoints + // 15: verbosity + // 16: numProcessPerPole + int int_para[17]; + + // parameter array of type double + // 0: spin + // 1: temperature + // 2: gap + // 3: deltaE + // 4: muMin0 + // 5: muMax0 + // 6: mu0 + // 7: muInertiaTolerance + // 8: muInertiaExpansion + // 9: muPEXSISafeGuard + // 10: numElectronPEXSITolerance + // 11: ZERO_Limit + double double_para[12]; + + // read in PEXSI options from GlobalV + int_para[0] = pexsi::PEXSI_Solver::pexsi_npole; + int_para[1] = pexsi::PEXSI_Solver::pexsi_inertia; + int_para[2] = pexsi::PEXSI_Solver::pexsi_nmax; + int_para[3] = 0; + int_para[4] = 1; // pexsi::PEXSI_Solver::pexsi_symbolic; + int_para[5] = pexsi::PEXSI_Solver::pexsi_comm; + int_para[6] = 0; + int_para[7] = pexsi::PEXSI_Solver::pexsi_storage; + int_para[8] = pexsi::PEXSI_Solver::pexsi_ordering; + int_para[9] = pexsi::PEXSI_Solver::pexsi_row_ordering; + int_para[10] = pexsi::PEXSI_Solver::pexsi_nproc; + int_para[11] = pexsi::PEXSI_Solver::pexsi_symm; + int_para[12] = pexsi::PEXSI_Solver::pexsi_trans; + int_para[13] = pexsi::PEXSI_Solver::pexsi_method; + int_para[14] = 2; + int_para[15] = 0; + int_para[16] = pexsi::PEXSI_Solver::pexsi_nproc_pole; + + double_para[0] = GlobalV::NSPIN; // pexsi::PEXSI_Solver::pexsi_spin; + double_para[1] = pexsi::PEXSI_Solver::pexsi_temp; + double_para[2] = pexsi::PEXSI_Solver::pexsi_gap; + double_para[3] = pexsi::PEXSI_Solver::pexsi_delta_e; + double_para[4] = pexsi::PEXSI_Solver::pexsi_mu_lower; + double_para[5] = pexsi::PEXSI_Solver::pexsi_mu_upper; + double_para[6] = pexsi::PEXSI_Solver::pexsi_mu; + double_para[7] = pexsi::PEXSI_Solver::pexsi_mu_thr; + double_para[8] = pexsi::PEXSI_Solver::pexsi_mu_expand; + double_para[9] = pexsi::PEXSI_Solver::pexsi_mu_guard; + double_para[10] = pexsi::PEXSI_Solver::pexsi_elec_thr; + double_para[11] = pexsi::PEXSI_Solver::pexsi_zero_thr; + + options.numPole = int_para[0]; + options.isInertiaCount = int_para[1]; + options.maxPEXSIIter = int_para[2]; + options.matrixType = int_para[3]; + options.isSymbolicFactorize = int_para[4]; + options.isConstructCommPattern = int_para[5]; + options.solver = int_para[6]; + options.symmetricStorage = int_para[7]; + options.ordering = int_para[8]; + options.rowOrdering = int_para[9]; + options.npSymbFact = int_para[10]; + options.symmetric = int_para[11]; + options.transpose = int_para[12]; + options.method = int_para[13]; + options.nPoints = int_para[14]; + options.verbosity = int_para[15]; + numProcessPerPole = int_para[16]; + + options.spin = double_para[0]; + options.temperature = double_para[1]; + options.gap = double_para[2]; + options.deltaE = double_para[3]; + options.muMin0 = double_para[4]; + options.muMax0 = double_para[5]; + options.mu0 = double_para[6]; + options.muInertiaTolerance = double_para[7]; + options.muInertiaExpansion = double_para[8]; + options.muPEXSISafeGuard = double_para[9]; + options.numElectronPEXSITolerance = double_para[10]; + ZERO_Limit = double_para[11]; + + return 0; +} + +void splitNProc2NProwNPcol(const int NPROC, int& nprow, int& npcol) +{ + int integral_part = (int)sqrt(NPROC); + if (NPROC % integral_part == 0) + { + nprow = integral_part; + npcol = NPROC / integral_part; + } + else + { + int flag; + int i; + int low = pow(integral_part, 2); + int high = pow(integral_part + 1, 2); + if ((NPROC - low) >= (high - NPROC)) + { + flag = integral_part + 1; + } + else + { + flag = integral_part; + } + for (i = flag; i > 0; ++i) + { + if (NPROC % i == 0) + break; + } + nprow = i; + npcol = NPROC / i; + } +} + +int simplePEXSI(MPI_Comm comm_PEXSI, + MPI_Comm comm_2D, + MPI_Group group_2D, + const int blacs_ctxt, // communicator parameters + const int size, + const int nblk, + const int nrow, + const int ncol, + char layout, // matrix parameters + double* H, + double* S, // input matrices + const double numElectronExact, + const std::string PexsiOptionFile, // pexsi parameters file + double*& DM, + double*& EDM, // output matrices + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy, // output energy + double& mu, + double mu0) +{ + + if (comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL) + return 0; + int myid; + std::ofstream f_log; + if (comm_PEXSI != MPI_COMM_NULL) + { + MPI_Comm_rank(comm_PEXSI, &myid); + } + + // set up PEXSI parameter + PPEXSIOptions options; + PPEXSISetDefaultOptions(&options); + int numProcessPerPole; + double ZERO_Limit; + loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit); + options.mu0 = mu0; + + ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan"); + PPEXSIPlan plan; + int info; + int outputFileIndex; + int pexsi_prow, pexsi_pcol; + ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol"); + splitNProc2NProwNPcol(numProcessPerPole, pexsi_prow, pexsi_pcol); + ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol"); + + outputFileIndex = -1; + ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit"); + if (comm_PEXSI != MPI_COMM_NULL) + { + plan = PPEXSIPlanInitialize(comm_PEXSI, pexsi_prow, pexsi_pcol, outputFileIndex, &info); + } + ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit"); + + ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan"); + + // create compressed column storage distribution matrix parameter + // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test + // DONE(ofs_running,"create compressed column storage distribution matrix parameter, begin"); + DistCCSMatrix DST_Matrix(comm_PEXSI, numProcessPerPole, size); + // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test + // DONE(ofs_running,"create compressed column storage distribution matrix parameter, finish"); + + + // create block cyclic distribution matrix parameter + DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, layout); + // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test + // DONE(ofs_running,"create block cyclic distribution matrix parameter, finish"); + double* HnzvalLocal = nullptr; + double* SnzvalLocal = nullptr; + double* DMnzvalLocal = nullptr; + double* EDMnzvalLocal = nullptr; + double* FDMnzvalLocal = nullptr; + // transform H and S from 2D block cyclic distribution to compressed column sparse matrix + // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test + DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal); + // MPI_Barrier(MPI_COMM_WORLD); + // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test + if (comm_PEXSI != MPI_COMM_NULL) + { + + // Load H and S to PEXSI + int isSIdentity = 0; + PPEXSILoadRealHSMatrix(plan, + options, + size, + DST_Matrix.get_nnz(), + DST_Matrix.get_nnzlocal(), + DST_Matrix.get_numcol_local(), + DST_Matrix.get_colptr_local(), + DST_Matrix.get_rowind_local(), + HnzvalLocal, + isSIdentity, + SnzvalLocal, + &info); + + double nelec; + double muMinInertia; + double muMaxInertia; + int numTotalPEXSIIter; + int numTotalInertiaIter; // Number of total inertia[out] + // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test + ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT"); + PPEXSIDFTDriver(plan, // PEXSI plan[in] + options, // PEXSI Options[in] + numElectronExact, // exact electron number[in] + &mu, // chemical potential[out] + &nelec, // number of electrons[out] + &muMinInertia, // Lower bound for mu after the last inertia[out] + &muMaxInertia, // Upper bound for mu after the last inertia[out] + &numTotalInertiaIter, // Number of total inertia[out] + &numTotalPEXSIIter, // number of total pexsi evaluation procedure[out] + &info); // 0: successful; otherwise: unsuccessful + // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test + ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT"); + + // retrieve the results from the plan + if (DMnzvalLocal != nullptr) + delete[] DMnzvalLocal; + if (EDMnzvalLocal != nullptr) + delete[] EDMnzvalLocal; + if (FDMnzvalLocal != nullptr) + delete[] FDMnzvalLocal; + DMnzvalLocal = new double[DST_Matrix.get_nnzlocal()]; + EDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()]; + FDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()]; + if (myid < numProcessPerPole) + { + PPEXSIRetrieveRealDFTMatrix(plan, + DMnzvalLocal, + EDMnzvalLocal, + FDMnzvalLocal, + &totalEnergyH, + &totalEnergyS, + &totalFreeEnergy, + &info); + } + // clean PEXSI + PPEXSIPlanFinalize(plan, &info); + } + + // transform Density Matrix and Energy Density Matrix from compressed column sparse matrix + // back to 2D block cyclic distribution if neccessary + if (comm_2D != MPI_COMM_NULL) + { + // delete[] DM; + // delete[] EDM; + // DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()]; + // EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()]; + } + // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test + ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D"); + DistMatrixTransformer::transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM); + ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D"); + // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test + + MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); + delete[] DMnzvalLocal; + delete[] EDMnzvalLocal; + delete[] FDMnzvalLocal; + delete[] HnzvalLocal; + delete[] SnzvalLocal; + MPI_Barrier(MPI_COMM_WORLD); + return 0; +} +} // namespace pexsi +#endif \ No newline at end of file diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h new file mode 100644 index 0000000000..db8879e5ac --- /dev/null +++ b/source/module_hsolver/module_pexsi/simple_pexsi.h @@ -0,0 +1,29 @@ +#ifndef SIMPLE_PEXSI_H +#define SIMPLE_PEXSI_H + +#include +// a simple interface for calling pexsi with 2D block cyclic distributed matrix +namespace pexsi +{ +int simplePEXSI(MPI_Comm comm_PEXSI, + MPI_Comm comm_2D, + MPI_Group group_2D, + const int blacs_ctxt, // communicator parameters + const int size, + const int nblk, + const int nrow, + const int ncol, + char layout, // input matrix parameters + double* H, + double* S, // input matrices + const double nElectronExact, + const std::string PexsiOptionFile, // pexsi parameters file + double*& DM, + double*& EDM, // output matrices + double& totalEnergyH, + double& totalEnergyS, + double& totalFreeEnergy, + double& mu, + double mu0); +} +#endif // SIMPLE_PEXSI_H \ No newline at end of file diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt index 795854fcc9..cb785e83bb 100644 --- a/source/module_hsolver/test/CMakeLists.txt +++ b/source/module_hsolver/test/CMakeLists.txt @@ -93,6 +93,14 @@ if(ENABLE_LCAO) SOURCES diago_lcao_test.cpp ../diago_blas.cpp ) endif() + + if (ENABLE_PEXSI) + AddTest( + TARGET HSolver_LCAO_PEXSI + LIBS ${math_libs} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} MPI::MPI_CXX base psi device pexsi + SOURCES diago_pexsi_test.cpp ../diago_pexsi.cpp ../../module_basis/module_ao/parallel_orbitals.cpp ../../module_basis/module_ao/parallel_2d.cpp + ) + endif() endif() if (USE_CUDA) AddTest( @@ -116,6 +124,11 @@ install(FILES diago_cg_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES diago_david_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES diago_lcao_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES PEXSI-H-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES PEXSI-S-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES PEXSI-DM-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES diago_pexsi_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + find_program(BASH bash) add_test(NAME HSolver_cg_parallel COMMAND ${BASH} diago_cg_parallel_test.sh @@ -130,4 +143,10 @@ if(ENABLE_LCAO) COMMAND ${BASH} diago_lcao_parallel_test.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) +if(ENABLE_PEXSI) + add_test(NAME HSolver_LCAO_PEXSI_parallel + COMMAND ${BASH} diago_pexsi_parallel_test.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) +endif() endif() \ No newline at end of file diff --git a/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat new file mode 100644 index 0000000000..1043cc51a1 --- /dev/null +++ b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat @@ -0,0 +1,107 @@ + 26 26 + 8 + 0.660474083048563 + 3.884e-01 1.025e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.883e-01 1.024e-02 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 1.025e-02 2.683e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.024e-02 2.718e-04 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -7.169e-01 + 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 1.699e-01 + 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 + 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 1.699e-01 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 + 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 1.699e-01 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 1.773e-01 + 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -4.160e-02 + 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 + 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 -4.160e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 + 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 -4.160e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 + 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 3.891e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 + 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 3.891e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.426e-07 0.000e+00 + 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 -1.699e-01 + 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 3.891e-02 + 3.883e-01 1.024e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.884e-01 1.025e-02 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 1.024e-02 2.718e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.025e-02 2.683e-04 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 7.260e-01 + 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.671e-01 + 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 + 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 -1.671e-01 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 + 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 -1.671e-01 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 -1.781e-01 + 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 4.138e-02 + 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 + 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 4.138e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 + 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 4.138e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 + 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 3.977e-02 0.000e+00 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00 + 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 3.977e-02 + 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + -5.653e-07 0.000e+00 + 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00 0.000e+00 + 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 -1.671e-01 + 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 + 0.000e+00 3.977e-02 \ No newline at end of file diff --git a/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat new file mode 100644 index 0000000000..87c76fc184 --- /dev/null +++ b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat @@ -0,0 +1,26 @@ +26 -3.45008963e-01 7.00777279e-01 2.67435378e-18 -6.48062567e-16 5.69806192e-16 -2.01865721e-15 1.37825425e-15 1.02932021e-15 -1.29549404e-15 9.40872433e-16 -4.37541396e-16 -4.22502120e-15 -9.57567315e-16 -5.60569495e-01 3.19974163e-01 9.47088035e-16 6.99293619e-16 2.66546040e-17 2.24941591e-16 3.49977208e-16 6.64422948e-16 1.19697268e-15 -2.18249784e-16 -1.24808688e-15 2.25204771e-15 3.31560018e-16 + 7.97835961e-01 -3.26671355e-16 9.44414999e-17 1.08101471e-16 2.96820086e-16 -6.21597964e-17 -1.93606525e-17 1.47303571e-15 -5.85952677e-16 -9.93382239e-16 2.85142385e-15 1.26395876e-15 3.19974163e-01 -2.96734690e-02 -9.56683510e-16 2.99393236e-15 1.06817249e-15 2.22266652e-16 -1.05210457e-15 -2.10421437e-15 -2.19285057e-15 8.05560256e-16 1.20571583e-15 -4.19339449e-15 -1.13240719e-15 + 3.80362144e-01 5.95917771e-16 -2.18276481e-16 -3.54010265e-01 -4.19309899e-16 -2.30363123e-16 1.53423809e-15 -2.91444576e-16 -9.56803808e-17 -5.35261856e-17 -6.16823227e-02 -2.90999424e-16 -2.32177556e-15 -3.89575886e-02 2.12205807e-16 -8.22906289e-17 6.81593611e-03 2.10103917e-17 2.24379615e-16 -4.85438948e-16 9.00442486e-17 1.31161204e-16 4.61089846e-17 2.39556639e-01 + 3.80362144e-01 2.55037050e-16 -3.75167990e-16 -3.54010265e-01 -3.00934924e-16 4.18935390e-17 1.46886306e-16 -6.16823227e-02 -7.98028325e-16 -4.57838734e-17 -3.14309773e-16 1.21608455e-15 -4.64848149e-17 -3.89575886e-02 -8.08540395e-17 1.28237973e-16 6.81593611e-03 -8.35066919e-17 -1.00156533e-16 -1.74201705e-16 2.39556639e-01 4.32484922e-16 6.72537317e-17 + 3.80362144e-01 1.49782541e-16 6.27515573e-17 -3.54010265e-01 4.82940748e-17 -6.16823227e-02 6.20938499e-16 4.71589687e-16 -9.18536141e-16 -3.65700951e-16 -2.61249330e-16 -2.01796811e-16 3.17945596e-17 -3.89575886e-02 2.14594692e-16 -1.51811429e-16 6.81593611e-03 -2.12500255e-17 2.39556639e-01 -5.79371716e-17 -2.59943780e-16 5.30093710e-16 + 6.49510022e-01 -3.99877720e-16 2.33364502e-16 -1.06835200e-15 1.35924369e-16 4.35440159e-16 5.65086482e-17 2.05033710e-02 -2.40594211e-16 -5.69887431e-16 6.81593611e-03 2.06395261e-17 -1.15861484e-16 1.34872389e-01 5.35360292e-17 -5.33661495e-17 3.47762359e-16 -3.36405101e-16 -2.31631258e-16 -2.62331242e-17 -4.85682373e-02 + 6.49510022e-01 1.01473550e-16 -8.91511572e-16 -7.44322866e-16 2.05033710e-02 3.18067759e-16 1.77405265e-16 6.50186302e-16 -2.33952222e-15 3.10478769e-17 6.81593611e-03 -3.09684775e-17 3.48878974e-17 1.34872389e-01 1.38971253e-16 2.83475393e-17 1.26434741e-16 -4.85682373e-02 -2.28940621e-16 -2.75815582e-16 + 6.49510022e-01 -6.05413446e-16 2.05033710e-02 -4.85158818e-16 2.18100282e-16 5.90800199e-16 -4.24041197e-16 -1.76755725e-16 2.48864123e-16 6.96454974e-17 6.81593611e-03 -1.21887704e-16 8.99652487e-17 1.34872389e-01 1.30566455e-16 -4.85682373e-02 4.45112250e-17 2.04467699e-16 -3.02221322e-16 + 1.39709618e+00 3.28775150e-17 3.02403823e-16 1.12276155e-15 4.11283608e-16 -1.68777752e-15 1.99081203e-15 -3.80457763e-16 -2.91238204e-16 -2.93921607e-16 -7.17012563e-20 1.88551508e-16 -4.63408913e-17 -4.73478204e-01 1.77949873e-16 3.47597824e-16 -7.95551748e-17 1.22787443e-15 + 1.36533927e+00 -4.17480353e-16 2.38782046e-16 2.07954517e-16 4.06349249e-16 -9.28946662e-17 -1.76779162e-16 -2.40828408e-16 -2.39556639e-01 -9.25671896e-17 1.12753601e-16 4.85682373e-02 -1.30259315e-17 2.04879371e-01 2.55654809e-16 -8.58762656e-16 1.09717457e-15 + 1.36533927e+00 -2.91149533e-16 4.76951783e-17 4.23208590e-16 -1.68531661e-15 3.22333315e-16 -2.39556639e-01 -2.64951724e-17 -6.87350695e-17 4.85682373e-02 1.57686979e-16 -3.76991794e-16 5.08885431e-16 2.04879371e-01 1.27435694e-15 4.01644429e-16 + 1.39709618e+00 -4.55433608e-17 -2.99630111e-15 3.78928659e-15 6.58110607e-17 3.50160588e-16 -1.80754014e-16 -2.30392781e-17 -3.48739041e-16 1.55913703e-16 -6.27363150e-17 9.66464560e-16 -1.17769212e-15 -4.73478204e-01 3.90844224e-17 + 1.36533927e+00 -1.30972594e-15 1.71848190e-15 -2.39556639e-01 3.88957496e-17 4.45336175e-17 4.85682373e-02 -1.94106500e-17 -1.20756475e-16 -9.64457501e-16 6.28258136e-16 2.28328203e-16 7.89854971e-17 2.04879371e-01 + -3.45008963e-01 7.00777279e-01 2.73555881e-15 -1.00946454e-15 -1.16246495e-15 -1.18942654e-15 2.34640662e-15 -2.17975499e-17 1.59173761e-15 1.65013351e-16 -5.00521378e-16 3.23944830e-15 2.47573682e-16 + 7.97835961e-01 -2.89307129e-16 4.07830148e-16 3.07829438e-16 1.46389913e-16 -1.66937141e-16 -2.06630218e-17 -1.15365109e-15 4.29729746e-16 6.95094012e-16 -2.40691019e-15 -8.41099617e-16 + 3.80362144e-01 -3.72488558e-16 -1.36186620e-16 -3.54010265e-01 1.65418864e-16 -5.01909637e-17 8.72781487e-16 -3.15475321e-16 -1.75617767e-16 -1.33423172e-16 6.16823227e-02 + 3.80362144e-01 6.95266223e-16 1.24652862e-16 -3.54010265e-01 -2.05726330e-16 2.83810662e-16 2.42210163e-16 6.16823227e-02 -8.82894651e-16 9.45554362e-18 + 3.80362144e-01 -5.04905355e-16 -3.43420006e-16 -3.54010265e-01 1.97895263e-16 6.16823227e-02 5.27704784e-16 4.09823362e-16 -1.55579456e-16 + 6.49510022e-01 3.01665582e-16 1.65533985e-16 -1.52276340e-15 4.86620331e-16 6.36576344e-16 4.94854914e-17 -2.05033710e-02 + 6.49510022e-01 -1.88619393e-17 -1.87155561e-16 -5.78216449e-16 -2.05033710e-02 1.56194250e-15 -3.49237870e-16 + 6.49510022e-01 -3.24872954e-16 -2.05033710e-02 -7.32295395e-16 -8.96538828e-16 9.96070017e-16 + 1.39709618e+00 -1.11050273e-16 -1.69128575e-17 -1.24504759e-15 -8.08763902e-16 + 1.36533927e+00 3.67630294e-16 -7.04593046e-17 8.84496171e-18 + 1.36533927e+00 9.35720969e-16 -1.83878822e-16 + 1.39709618e+00 1.75929309e-16 + 1.36533927e+00 diff --git a/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat new file mode 100644 index 0000000000..e1bb9a1439 --- /dev/null +++ b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat @@ -0,0 +1,26 @@ +26 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18 1.24515654e+00 -1.87761557e+00 -4.52345240e-17 -1.21904982e-16 -4.81699167e-17 -3.15892467e-17 -1.29304662e-16 -9.53928505e-18 -6.92703544e-17 -2.51210914e-17 -6.61062629e-17 -5.20340810e-17 -2.29647573e-17 + 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17 -1.87761557e+00 2.38584105e+00 5.04340357e-17 2.33103467e-17 6.07830843e-18 -2.07895767e-17 3.37674767e-16 -7.05544564e-17 -1.25743735e-16 2.64630033e-17 4.25617115e-17 -2.13442138e-16 2.66781497e-17 + 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00 4.52345240e-17 -5.04340357e-17 -1.24362116e-01 -3.18772379e-17 -7.50471191e-18 9.98285745e-02 1.08555743e-17 1.01711716e-17 1.79143233e-17 2.27868803e-17 2.29969445e-18 -1.53440018e-18 5.44995645e-01 + 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18 1.21904982e-16 -2.33103467e-17 -3.18772379e-17 -1.24362116e-01 2.03965534e-17 1.08555743e-17 9.98285745e-02 2.64680855e-17 3.72186277e-17 -3.19162015e-18 5.44995645e-01 3.61445899e-17 1.12147162e-18 + 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18 4.81699167e-17 -6.07830843e-18 -7.50471191e-18 2.03965534e-17 -1.24362116e-01 1.01711716e-17 2.64680855e-17 9.98285745e-02 -3.06117707e-18 5.44995645e-01 -4.81792340e-18 1.73573992e-17 2.58853269e-17 + 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20 3.15892467e-17 2.07895767e-17 9.98285745e-02 1.08555743e-17 1.01711716e-17 -3.41075882e-02 1.01779479e-17 6.03087458e-17 -5.67003855e-18 -3.71339244e-18 1.81722449e-17 7.94516905e-19 -4.08542832e-01 + 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18 1.29304662e-16 -3.37674767e-16 1.08555743e-17 9.98285745e-02 2.64680855e-17 1.01779479e-17 -3.41075882e-02 3.01950305e-17 -1.21972744e-17 2.71050543e-18 -4.08542832e-01 1.97392558e-17 6.03087458e-18 + 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18 9.53928505e-18 7.05544564e-17 1.01711716e-17 2.64680855e-17 9.98285745e-02 6.03087458e-17 3.01950305e-17 -3.41075882e-02 -4.38763067e-18 -4.08542832e-01 3.43793733e-17 2.17179248e-18 -5.34647196e-18 + 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19 -6.92703544e-17 -1.25743735e-16 -1.79143233e-17 -3.72186277e-17 3.06117707e-18 5.67003855e-18 1.21972744e-17 4.38763067e-18 -6.02927412e-01 -1.92810110e-17 2.84052499e-18 -7.30159341e-17 6.80624854e-17 + 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19 -2.51210914e-17 2.64630033e-17 -2.27868803e-17 3.19162015e-18 -5.44995645e-01 3.71339244e-18 -2.71050543e-18 4.08542832e-01 -1.92810110e-17 1.40487499e-01 6.89230709e-18 2.04346699e-17 1.79486282e-17 + 1.16731758e+00 1.04896560e-17 -6.09863722e-19 -6.61062629e-17 4.25617115e-17 -2.29969445e-18 -5.44995645e-01 4.81792340e-18 -1.81722449e-17 4.08542832e-01 -3.43793733e-17 2.84052499e-18 6.89230709e-18 1.40487499e-01 -3.05927125e-18 -4.40795946e-18 + 9.27713518e-01 7.03147766e-21 -5.20340810e-17 -2.13442138e-16 1.53440018e-18 -3.61445899e-17 -1.73573992e-17 -7.94516905e-19 -1.97392558e-17 -2.17179248e-18 -7.30159341e-17 2.04346699e-17 -3.05927125e-18 -6.02927412e-01 1.56701095e-18 + 1.16731758e+00 -2.29647573e-17 2.66781497e-17 -5.44995645e-01 -1.12147162e-18 -2.58853269e-17 4.08542832e-01 -6.03087458e-18 5.34647196e-18 6.80624854e-17 1.79486282e-17 -4.40795946e-18 1.56701095e-18 1.40487499e-01 + 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18 + 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17 + 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00 + 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18 + 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18 + 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20 + 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18 + 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18 + 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19 + 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19 + 1.16731758e+00 1.04896560e-17 -6.09863722e-19 + 9.27713518e-01 7.03147766e-21 + 1.16731758e+00 diff --git a/source/module_hsolver/test/diago_pexsi_parallel_test.sh b/source/module_hsolver/test/diago_pexsi_parallel_test.sh new file mode 100644 index 0000000000..4767d690a2 --- /dev/null +++ b/source/module_hsolver/test/diago_pexsi_parallel_test.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +np=`cat /proc/cpuinfo | grep "cpu cores" | uniq| awk '{print $NF}'` +echo "nprocs in this machine is $np" + +for i in 6 3 2;do + if [[ $i -gt $np ]];then + continue + fi + echo "TEST DIAGO PEXSI in parallel, nprocs=$i" + mpirun -np $i ./HSolver_LCAO_PEXSI + if [[ $? != 0 ]];then + echo -e "\e[1;33m [ FAILED ] \e[0m"\ + "execute UT with $i cores error." + exit 1 + fi + break +done diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp new file mode 100644 index 0000000000..4b3b8aba99 --- /dev/null +++ b/source/module_hsolver/test/diago_pexsi_test.cpp @@ -0,0 +1,428 @@ +#ifdef __PEXSI +#include +#include +#include +#include +#include +#include +#include + +#include "module_hsolver/diago_pexsi.h" +#include "module_hsolver/module_pexsi/pexsi_solver.h" +#include "module_hsolver/test/diago_elpa_utils.h" +#include "module_basis/module_ao/parallel_orbitals.h" +#include "module_base/parallel_global.h" +#include "module_base/global_variable.h" + +#define PASSTHRESHOLD 5e-4 +#define DETAILINFO false +#define PRINT_HS false +#define REPEATRUN 1 + +template class HamiltTEST : public hamilt::Hamilt +{ + public: + int desc[9]; + int nrow, ncol; + std::vector h_local; + std::vector s_local; + + void matrix(hamilt::MatrixBlock &hk_in, hamilt::MatrixBlock &sk_in) + { + hk_in = hamilt::MatrixBlock{this->h_local.data(), + (size_t)this->nrow, + (size_t)this->ncol, + this->desc}; + sk_in = hamilt::MatrixBlock{this->s_local.data(), + (size_t)this->nrow, + (size_t)this->ncol, + this->desc}; + } + + void constructHamilt(const int iter, const hamilt::MatrixBlock rho) {} + void updateHk(const int ik) {} +}; + + + +template class PexsiPrepare +{ + public: + PexsiPrepare(int nlocal, int nbands, int nb2d, int sparsity, std::string hfname, std::string sfname, std::string dmname) + : nlocal(nlocal), nbands(nbands), nb2d(nb2d), sparsity(sparsity), hfname(hfname), sfname(sfname), dmname(dmname) + { + MPI_Comm_size(MPI_COMM_WORLD, &dsize); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + } + + int dsize, myrank; + int nlocal, nbands, nb2d, sparsity; + int nprows, npcols, myprow, mypcol; + int nrow, ncol; + double hsolver_time = 0.0; + std::string sfname, hfname, dmname; + HamiltTEST hmtest; + std::vector h; + std::vector s; + std::vector h_local; + std::vector s_local; + psi::Psi psi; + hsolver::DiagoPexsi* dh = nullptr; + Parallel_Orbitals po; + std::vector abc; + int icontxt; + + double mu; + + // density matrix + std::vector dm_local; + std::vector edm_local; + + std::vector dm; + + bool read_HS() + { + bool readhfile = false; + bool readsfile = false; + if (this->myrank == 0) + { + int hdim, sdim; + readhfile = LCAO_DIAGO_TEST::read_hs>(hfname, this->h); + readsfile = LCAO_DIAGO_TEST::read_hs>(sfname, this->s); + hdim = sqrt(this->h.size()); + sdim = sqrt(this->s.size()); + if (hdim != sdim) + { + printf("Error: dimensions of H and S are not equal, %d, %d\n", hdim, sdim); + readhfile = readsfile = false; + } + nlocal = hdim; + } + MPI_Bcast(&nlocal, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&readhfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + MPI_Bcast(&readsfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); + nbands = nlocal/2; + if (readhfile && readsfile) + return true; + return false; + } + + bool produce_HS() + { + bool ok = this->read_HS(); + return ok; + } + + void pb2d() + { + LCAO_DIAGO_TEST::process_2d(nprows, npcols, myprow, mypcol, icontxt); + + hmtest.nrow = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, nprows, myprow); // the number of row of the new_matrix in each process + hmtest.ncol = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, npcols, mypcol); // the number of column of the new_matrix in each process + + + int ISRC = 0, info; + descinit_(hmtest.desc, &nlocal, &nlocal, &nb2d, &nb2d, &ISRC, &ISRC, &icontxt, &(hmtest.nrow), &info); + if (info != 0) + { + printf("Invalid blacs-distribution. Abort!\n"); + exit(1); + } + + + // set po variables + po.ncol = hmtest.ncol; + po.nrow = hmtest.nrow; + po.nb = nb2d; + po.blacs_ctxt = icontxt; + po.comm_2D = MPI_COMM_WORLD; + po.dim0 = nprows; + po.dim1 = npcols; + po.testpb = true; + + if (DETAILINFO && myrank == 0) + { + std::cout << "nrow: " << hmtest.nrow << ", ncol: " << hmtest.ncol << ", nb: " << nb2d << std::endl; + } + + dh = new hsolver::DiagoPexsi(&po); + } + + void distribute_data() + { + + int local_size = hmtest.nrow * hmtest.ncol; + this->h_local.resize(local_size); + this->s_local.resize(local_size); + + LCAO_DIAGO_TEST::distribute_data(this->h.data(),this->h_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt); + LCAO_DIAGO_TEST::distribute_data(this->s.data(),this->s_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt); + } + + void set_env() + { + GlobalV::NLOCAL = nlocal; + GlobalV::NBANDS = nbands; + GlobalV::DSIZE = dsize; + GlobalV::NSPIN = 1; + DIAG_WORLD = MPI_COMM_WORLD; + GlobalV::NPROC = dsize; + + psi.fix_k(0); + } + + void set_pexsi_vars() + { + // pexsi::PEXSI_Solver::set_pexsi_vars(); + pexsi::PEXSI_Solver::pexsi_npole = 40; + pexsi::PEXSI_Solver::pexsi_inertia = true; + pexsi::PEXSI_Solver::pexsi_nmax = 80; + // pexsi_symbolic = 1; + pexsi::PEXSI_Solver::pexsi_comm = true; + pexsi::PEXSI_Solver::pexsi_storage = true; + pexsi::PEXSI_Solver::pexsi_ordering = 0; + pexsi::PEXSI_Solver::pexsi_row_ordering = 1; + pexsi::PEXSI_Solver::pexsi_nproc = 1; + pexsi::PEXSI_Solver::pexsi_symm = true; + pexsi::PEXSI_Solver::pexsi_trans = false; + pexsi::PEXSI_Solver::pexsi_method = 1; + pexsi::PEXSI_Solver::pexsi_nproc_pole = 1; + // pexsi_spin = 2; + pexsi::PEXSI_Solver::pexsi_temp = 0.015; + pexsi::PEXSI_Solver::pexsi_gap = 0; + pexsi::PEXSI_Solver::pexsi_delta_e = 20.0; + pexsi::PEXSI_Solver::pexsi_mu_lower = -10; + pexsi::PEXSI_Solver::pexsi_mu_upper = 10; + pexsi::PEXSI_Solver::pexsi_mu = 0.0; + pexsi::PEXSI_Solver::pexsi_mu_thr = 0.05; + pexsi::PEXSI_Solver::pexsi_mu_expand = 0.3; + pexsi::PEXSI_Solver::pexsi_mu_guard = 0.2; + pexsi::PEXSI_Solver::pexsi_elec_thr = 0.001; + pexsi::PEXSI_Solver::pexsi_zero_thr = 1e-10; + pexsi::PEXSI_Solver::pexsi_mu = mu; + } + + void diago() + { + if (DETAILINFO && myrank == 0) + { + std::cout << "Start to solve the KS equation using PEXSI" << std::endl; + } + this->pb2d(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the 2D parallelization" << std::endl; + } + this->distribute_data(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the data distribution" << std::endl; + } + this->set_env(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the environment setting" << std::endl; + } + double starttime = 0.0, endtime = 0.0; + this->set_pexsi_vars(); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the PEXSI setting" << std::endl; + } + MPI_Barrier(MPI_COMM_WORLD); + starttime = MPI_Wtime(); + for(int i=0;ih_local; + hmtest.s_local = this->s_local; + dh->diag(&hmtest, psi, nullptr); + + // copy the density matrix to dm_local + dm_local = dh->DM; + edm_local = dh->EDM; + } + MPI_Barrier(MPI_COMM_WORLD); + if (DETAILINFO && myrank == 0) + { + std::cout << "Finish the KS equation solving" << std::endl; + } + endtime = MPI_Wtime(); + hsolver_time = (endtime - starttime)/REPEATRUN; + } + + bool read_ref() + { + auto f_dm = std::ifstream(dmname); + if (!f_dm.is_open()) + { + std::cout << "Error: cannot open the reference file " << dmname << std::endl; + return false; + } + int nread = 0; + f_dm >> nread; + if (nread != nlocal) + { + std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl; + return false; + } + f_dm >> nread; + if (nread != nlocal) + { + std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl; + return false; + } + + f_dm >> GlobalV::nelec >> mu; + + dm.resize(nread*nread); + // T* edm = new T[nglobal*nglobal]; + for (int i = 0; i < nread; i++) + { + for (int j = 0; j < nread; j++) + { + f_dm >> dm[i*nread+j]; + } + } + return true; + } + + + bool compare_ref(std::stringstream &out_info) + { + double maxerror = 0.0; + int iindex = 0; + bool pass = true; + + auto ofs = std::ofstream("dm_local" + std::to_string(myprow) + std::to_string(mypcol) + ".dat"); + + int SENDPROW = 0, SENDPCOL = 0, tag = 0; + + // do iteration for matrix, distribute old_matrix to each process, pass a block each time + for (int row = 0; row < nlocal; row++) + { + int recv_prow = (row / nb2d) % nprows; // the row number of recive process + int nm_row = ((row / nb2d) / nprows) * nb2d + row % nb2d; // row number of block in new_matrix + for (int col = 0; col < nlocal; col += nb2d) + { + int recv_pcol = (col / nb2d) % npcols; // the column number of recive process + int nm_col = ((col / nb2d) / npcols) * nb2d + col % nb2d; + int pass_length = std::min(nlocal - col, nb2d); // nlocal may not be devided by nb2d; + // at global: nlocal * row + col + i + // at local: (nm_col + i) * hmtest.nrow + nm_row + + if (myprow == recv_prow && mypcol == recv_pcol) + { + double diff = 0; + for (int i = 0; i < pass_length; i++) + { + diff = std::abs(dm_local[0][(nm_col + i) * hmtest.nrow + nm_row] - dm[nlocal * row + col + i]); + if (diff > maxerror) + { + maxerror = diff; + } + if (diff > PASSTHRESHOLD) + { + pass = false; + } + } + } + } + } + + bool pass_all = true; + double maxerror_all = 0.0; + MPI_Allreduce(&pass, &pass_all, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD); + MPI_Allreduce(&maxerror, &maxerror_all, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + if (myrank == 0) + { + std::cout << "H/S matrix are read from " << hfname << ", " << sfname << std::endl; + std::cout << "Density matrix are read from " << dmname << std::endl; + std::cout << std::endl; + out_info << "Maximum difference between ks_hsolver and ref is " << maxerror_all << ", the pass threshold is " << PASSTHRESHOLD << std::endl; + + if (DETAILINFO) + { + std::cout << out_info.str(); + out_info.str(""); + out_info.clear(); + } + } + delete dh; + return pass_all; + } +}; + +class PexsiGammaOnlyTest : public ::testing::TestWithParam> {}; + +TEST_P(PexsiGammaOnlyTest, LCAO) +{ + std::stringstream out_info; + PexsiPrepare dp = GetParam(); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "nlocal: " << dp.nlocal << ", nbands: " << dp.nbands << ", nb2d: " << dp.nb2d << ", sparsity: " << dp.sparsity << std::endl; + + } + ASSERT_TRUE(dp.produce_HS()); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "H/S matrix are read from " << dp.hfname << ", " << dp.sfname << std::endl; + } + ASSERT_TRUE(dp.read_ref()); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "Density matrix are read from " << dp.dmname << std::endl; + } + dp.diago(); + if (DETAILINFO && dp.myrank == 0) + { + std::cout << "Time for hsolver: " << dp.hsolver_time << "s" << std::endl; + } + + bool pass = dp.compare_ref(out_info); + EXPECT_TRUE(pass) << out_info.str(); + + MPI_Barrier(MPI_COMM_WORLD); +} + +INSTANTIATE_TEST_SUITE_P( + DiagoTest, + PexsiGammaOnlyTest, + ::testing::Values( //int nlocal, int nbands, int nb2d, int sparsity, std::string ks_solver_in, std::string hfname, std::string sfname + PexsiPrepare(0, 0, 2, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat"), + PexsiPrepare(0, 0, 1, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat") + + )); + + +int main(int argc, char **argv) +{ + MPI_Init(&argc, &argv); + int mypnum, dsize; + MPI_Comm_size(MPI_COMM_WORLD, &dsize); + MPI_Comm_rank(MPI_COMM_WORLD, &mypnum); + + testing::InitGoogleTest(&argc, argv); + //Parallel_Global::split_diag_world(dsize); + ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners(); + if (mypnum != 0) + { + delete listeners.Release(listeners.default_result_printer()); + } + int result = RUN_ALL_TESTS(); + + if (mypnum == 0 && result != 0) + { + std::cout << "ERROR:some tests are not passed" << std::endl; + return result; + } + else + { + MPI_Finalize(); + return 0; + } +} + + +#endif // __PEXSI \ No newline at end of file diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index e7ee360a28..f93e01659b 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -650,6 +650,34 @@ void Input::Default(void) qo_thr = 1e-6; qo_screening_coeff = {}; + //========================================================== + // variables for PEXSI + //========================================================== + pexsi_npole = 40; + pexsi_inertia = true; + pexsi_nmax = 80; + // pexsi_symbolic = 1; + pexsi_comm = true; + pexsi_storage = true; + pexsi_ordering = 0; + pexsi_row_ordering = 1; + pexsi_nproc = 1; + pexsi_symm = true; + pexsi_trans = false; + pexsi_method = 1; + pexsi_nproc_pole = 1; + // pexsi_spin = 2; + pexsi_temp = 0.015; + pexsi_gap = 0; + pexsi_delta_e = 20.0; + pexsi_mu_lower = -10; + pexsi_mu_upper = 10; + pexsi_mu = 0.0; + pexsi_mu_thr = 0.05; + pexsi_mu_expand = 0.3; + pexsi_mu_guard = 0.2; + pexsi_elec_thr = 0.001; + pexsi_zero_thr = 1e-10; return; } @@ -2358,6 +2386,9 @@ bool Input::Read(const std::string& fn) { read_value(ifs, sc_file); } + //---------------------------------------------------------------------------------- + // Quasiatomic orbital + //---------------------------------------------------------------------------------- else if (strcmp("qo_switch", word) == 0){ read_bool(ifs, qo_switch); } @@ -2373,6 +2404,106 @@ bool Input::Read(const std::string& fn) else if (strcmp("qo_screening_coeff", word) == 0){ read_value2stdvector(ifs, qo_screening_coeff); } + //---------------------------------------------------------------------------------- + // PEXSI + //---------------------------------------------------------------------------------- + else if (strcmp("pexsi_npole", word) == 0){ + read_value(ifs, pexsi_npole); + } + else if (strcmp("pexsi_inertia", word) == 0){ + read_value(ifs, pexsi_inertia); + } + else if (strcmp("pexsi_nmax", word) == 0) { + read_value(ifs, pexsi_nmax); + } + // else if (strcmp("pexsi_symbolic", word) == 0) + // { + // read_value(ifs, pexsi_symbolic); + // } + else if (strcmp("pexsi_comm", word) == 0) + { + read_value(ifs, pexsi_comm); + } + else if (strcmp("pexsi_storage", word) == 0) + { + read_value(ifs, pexsi_storage); + } + else if (strcmp("pexsi_ordering", word) == 0) + { + read_value(ifs, pexsi_ordering); + } + else if (strcmp("pexsi_row_ordering", word) == 0) + { + read_value(ifs, pexsi_row_ordering); + } + else if (strcmp("pexsi_nproc", word) == 0) + { + read_value(ifs, pexsi_nproc); + } + else if (strcmp("pexsi_symm", word) == 0) + { + read_value(ifs, pexsi_symm); + } + else if (strcmp("pexsi_trans", word) == 0) + { + read_value(ifs, pexsi_trans); + } + else if (strcmp("pexsi_method", word) == 0) + { + read_value(ifs, pexsi_method); + } + else if (strcmp("pexsi_nproc_pole", word) == 0) + { + read_value(ifs, pexsi_nproc_pole); + } + // else if (strcmp("pexsi_spin", word) == 0) + // { + // read_value(ifs, pexsi_spin); + // } + else if (strcmp("pexsi_temp", word) == 0) + { + read_value(ifs, pexsi_temp); + } + else if (strcmp("pexsi_gap", word) == 0) + { + read_value(ifs, pexsi_gap); + } + else if (strcmp("pexsi_delta_e", word) == 0) + { + read_value(ifs, pexsi_delta_e); + } + else if (strcmp("pexsi_mu_lower", word) == 0) + { + read_value(ifs, pexsi_mu_lower); + } + else if (strcmp("pexsi_mu_upper", word) == 0) + { + read_value(ifs, pexsi_mu_upper); + } + else if (strcmp("pexsi_mu", word) == 0) + { + read_value(ifs, pexsi_mu); + } + else if (strcmp("pexsi_mu_thr", word) == 0) + { + read_value(ifs, pexsi_mu_thr); + } + else if (strcmp("pexsi_mu_expand", word) == 0) + { + read_value(ifs, pexsi_mu_expand); + } + else if (strcmp("pexsi_mu_guard", word) == 0) + { + read_value(ifs, pexsi_mu_guard); + } + else if (strcmp("pexsi_elec_thr", word) == 0) + { + read_value(ifs, pexsi_elec_thr); + } + else if (strcmp("pexsi_zero_thr", word) == 0) + { + read_value(ifs, pexsi_zero_thr); + } else { // xiaohui add 2015-09-15 @@ -3754,6 +3885,34 @@ void Input::Bcast() Parallel_Common::bcast_bool(qo_switch); Parallel_Common::bcast_string(qo_basis); Parallel_Common::bcast_double(qo_thr); + //========================================================== + // PEXSI + //========================================================== + Parallel_Common::bcast_int(pexsi_npole); + Parallel_Common::bcast_bool(pexsi_inertia); + Parallel_Common::bcast_int(pexsi_nmax); + // Parallel_Common::bcast_int(pexsi_symbolic); + Parallel_Common::bcast_bool(pexsi_comm); + Parallel_Common::bcast_bool(pexsi_storage); + Parallel_Common::bcast_int(pexsi_ordering); + Parallel_Common::bcast_int(pexsi_row_ordering); + Parallel_Common::bcast_int(pexsi_nproc); + Parallel_Common::bcast_bool(pexsi_symm); + Parallel_Common::bcast_bool(pexsi_trans); + Parallel_Common::bcast_int(pexsi_method); + Parallel_Common::bcast_int(pexsi_nproc_pole); + // Parallel_Common::bcast_double(pexsi_spin); + Parallel_Common::bcast_double(pexsi_temp); + Parallel_Common::bcast_double(pexsi_gap); + Parallel_Common::bcast_double(pexsi_delta_e); + Parallel_Common::bcast_double(pexsi_mu_lower); + Parallel_Common::bcast_double(pexsi_mu_upper); + Parallel_Common::bcast_double(pexsi_mu); + Parallel_Common::bcast_double(pexsi_mu_thr); + Parallel_Common::bcast_double(pexsi_mu_expand); + Parallel_Common::bcast_double(pexsi_mu_guard); + Parallel_Common::bcast_double(pexsi_elec_thr); + Parallel_Common::bcast_double(pexsi_zero_thr); /* broadcasting std::vector is sometime a annorying task... */ if (ntype != 0) /* ntype has been broadcasted before */ { @@ -3974,6 +4133,10 @@ void Input::Check(void) { ModuleBase::WARNING_QUIT("Input", "lapack can not be used with plane wave basis."); } + else if (ks_solver == "pexsi") + { + ModuleBase::WARNING_QUIT("Input", "pexsi can not be used with plane wave basis."); + } else if (ks_solver != "default" && ks_solver != "cg" && ks_solver != "dav" && @@ -4046,6 +4209,17 @@ void Input::Check(void) #ifndef __MPI ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version."); #endif + } + else if (ks_solver == "pexsi") + { +#ifdef __PEXSI + GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl; +#else + ModuleBase::WARNING_QUIT("Input", + "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx."); +#endif + + } else if (ks_solver != "default") { diff --git a/source/module_io/input.h b/source/module_io/input.h index 1f2f81903a..fb1b79f3ef 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -612,6 +612,34 @@ class Input double qo_thr = 1e-6; std::vector qo_strategy = {}; std::vector qo_screening_coeff = {}; + //========================================================== + // variables for PEXSI + //========================================================== + int pexsi_npole = 40; + bool pexsi_inertia = true; + int pexsi_nmax = 80; + // int pexsi_symbolic = 1; + bool pexsi_comm = true; + bool pexsi_storage = true; + int pexsi_ordering = 0; + int pexsi_row_ordering = 1; + int pexsi_nproc = 1; + bool pexsi_symm = true; + bool pexsi_trans = false; + int pexsi_method = 1; + int pexsi_nproc_pole = 1; + // double pexsi_spin = 2; + double pexsi_temp = 0.015; + double pexsi_gap = 0; + double pexsi_delta_e = 20.0; + double pexsi_mu_lower = -10; + double pexsi_mu_upper = 10; + double pexsi_mu = 0.0; + double pexsi_mu_thr = 0.05; + double pexsi_mu_expand = 0.3; + double pexsi_mu_guard = 0.2; + double pexsi_elec_thr = 0.001; + double pexsi_zero_thr = 1e-10; std::time_t get_start_time(void) const { diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index 755067f29b..54de3d6e1e 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -26,6 +26,9 @@ #include "module_hamilt_lcao/module_dftu/dftu.h" #include "module_hamilt_lcao/module_tddft/evolve_elec.h" #endif +#ifdef __PEXSI +#include "module_hsolver/module_pexsi/pexsi_solver.h" +#endif #include "module_base/timer.h" #include "module_elecstate/elecstate_lcao.h" @@ -827,6 +830,37 @@ void Input_Conv::Convert(void) GlobalV::qo_strategy = INPUT.qo_strategy; GlobalV::qo_thr = INPUT.qo_thr; GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff; + + //----------------------------------------------- + // PEXSI related parameters + //----------------------------------------------- +#ifdef __PEXSI + pexsi::PEXSI_Solver::pexsi_npole = INPUT.pexsi_npole; + pexsi::PEXSI_Solver::pexsi_inertia = INPUT.pexsi_inertia; + pexsi::PEXSI_Solver::pexsi_nmax = INPUT.pexsi_nmax; + // pexsi::PEXSI_Solver::pexsi_symbolic = INPUT.pexsi_symbolic; + pexsi::PEXSI_Solver::pexsi_comm = INPUT.pexsi_comm; + pexsi::PEXSI_Solver::pexsi_storage = INPUT.pexsi_storage; + pexsi::PEXSI_Solver::pexsi_ordering = INPUT.pexsi_ordering; + pexsi::PEXSI_Solver::pexsi_row_ordering = INPUT.pexsi_row_ordering; + pexsi::PEXSI_Solver::pexsi_nproc = INPUT.pexsi_nproc; + pexsi::PEXSI_Solver::pexsi_symm = INPUT.pexsi_symm; + pexsi::PEXSI_Solver::pexsi_trans = INPUT.pexsi_trans; + pexsi::PEXSI_Solver::pexsi_method = INPUT.pexsi_method; + pexsi::PEXSI_Solver::pexsi_nproc_pole = INPUT.pexsi_nproc_pole; + // pexsi::PEXSI_Solver::pexsi_spin = INPUT.pexsi_spin; + pexsi::PEXSI_Solver::pexsi_temp = INPUT.pexsi_temp; + pexsi::PEXSI_Solver::pexsi_gap = INPUT.pexsi_gap; + pexsi::PEXSI_Solver::pexsi_delta_e = INPUT.pexsi_delta_e; + pexsi::PEXSI_Solver::pexsi_mu_lower = INPUT.pexsi_mu_lower; + pexsi::PEXSI_Solver::pexsi_mu_upper = INPUT.pexsi_mu_upper; + pexsi::PEXSI_Solver::pexsi_mu = INPUT.pexsi_mu; + pexsi::PEXSI_Solver::pexsi_mu_thr = INPUT.pexsi_mu_thr; + pexsi::PEXSI_Solver::pexsi_mu_expand = INPUT.pexsi_mu_expand; + pexsi::PEXSI_Solver::pexsi_mu_guard = INPUT.pexsi_mu_guard; + pexsi::PEXSI_Solver::pexsi_elec_thr = INPUT.pexsi_elec_thr; + pexsi::PEXSI_Solver::pexsi_zero_thr = INPUT.pexsi_zero_thr; +#endif ModuleBase::timer::tick("Input_Conv", "Convert"); return; } diff --git a/source/module_io/test/for_testing_input_conv.h b/source/module_io/test/for_testing_input_conv.h index 1e25cc32e5..b012dbf183 100644 --- a/source/module_io/test/for_testing_input_conv.h +++ b/source/module_io/test/for_testing_input_conv.h @@ -26,6 +26,9 @@ #include "module_relax/relax_old/ions_move_basic.h" #include "module_relax/relax_old/ions_move_cg.h" #include "module_relax/relax_old/lattice_change_basic.h" +#ifdef __PEXSI +#include "module_hsolver/module_pexsi/pexsi_solver.h" +#endif bool berryphase::berry_phase_flag = false; @@ -355,6 +358,37 @@ pseudopot_cell_vnl ppcell; Charge_Mixing CHR_MIX; } // namespace GlobalC +#ifdef __PEXSI +namespace pexsi +{ +int PEXSI_Solver::pexsi_npole = 0; +bool PEXSI_Solver::pexsi_inertia = 0; +int PEXSI_Solver::pexsi_nmax = 0; +// int PEXSI_Solver::pexsi_symbolic = 0; +bool PEXSI_Solver::pexsi_comm = 0; +bool PEXSI_Solver::pexsi_storage = 0; +int PEXSI_Solver::pexsi_ordering = 0; +int PEXSI_Solver::pexsi_row_ordering = 0; +int PEXSI_Solver::pexsi_nproc = 0; +bool PEXSI_Solver::pexsi_symm = 0; +bool PEXSI_Solver::pexsi_trans = 0; +int PEXSI_Solver::pexsi_method = 0; +int PEXSI_Solver::pexsi_nproc_pole = 0; +// double PEXSI_Solver::pexsi_spin = 2; +double PEXSI_Solver::pexsi_temp = 0.0; +double PEXSI_Solver::pexsi_gap = 0.0; +double PEXSI_Solver::pexsi_delta_e = 0.0; +double PEXSI_Solver::pexsi_mu_lower = 0.0; +double PEXSI_Solver::pexsi_mu_upper = 0.0; +double PEXSI_Solver::pexsi_mu = 0.0; +double PEXSI_Solver::pexsi_mu_thr = 0.0; +double PEXSI_Solver::pexsi_mu_expand = 0.0; +double PEXSI_Solver::pexsi_mu_guard = 0.0; +double PEXSI_Solver::pexsi_elec_thr = 0.0; +double PEXSI_Solver::pexsi_zero_thr = 0.0; +} // namespace pexsi +#endif + #undef private #endif diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index bd135d19d5..2f64454291 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -523,6 +523,40 @@ TEST_F(InputConvTest, ReadTdEfieldTest) EXPECT_EQ(elecstate::H_TDDFT_pw::heavi_t0[0], 100); EXPECT_NEAR(elecstate::H_TDDFT_pw::heavi_amp[0], 1.00 * ModuleBase::BOHR_TO_A / ModuleBase::Ry_to_eV, 1e-8); } + +#ifdef __PEXSI +TEST_F(InputConvTest, PEXSI) +{ + INPUT.Default(); + std::string input_file = "./support/INPUT"; + INPUT.Read(input_file); + Input_Conv::Convert(); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 40); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_storage); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_ordering, 0); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_row_ordering, 1); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc, 1); + EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_symm); + EXPECT_FALSE(pexsi::PEXSI_Solver::pexsi_trans); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_method, 1); + EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc_pole, 1); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_temp, 0.015); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_gap, 0); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_delta_e, 20); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_lower, -10); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_upper, 10); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu, 0); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_thr, 0.05); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_expand, 0.3); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_guard, 0.2); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_elec_thr, 0.001); + EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_zero_thr, 1e-10); +} +#endif + #endif #undef private diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp index 28a4562947..5a6490551a 100644 --- a/source/module_io/test/input_test_para.cpp +++ b/source/module_io/test/input_test_para.cpp @@ -397,6 +397,30 @@ TEST_F(InputParaTest, Bcast) EXPECT_EQ(INPUT.qo_screening_coeff.size(), 0); EXPECT_EQ(INPUT.qo_thr, 1e-6); EXPECT_EQ(INPUT.qo_basis, "szv"); + + EXPECT_EQ(INPUT.pexsi_npole, 40); + EXPECT_TRUE(INPUT.pexsi_inertia); + EXPECT_EQ(INPUT.pexsi_nmax, 80); + EXPECT_TRUE(INPUT.pexsi_comm); + EXPECT_TRUE(INPUT.pexsi_storage); + EXPECT_EQ(INPUT.pexsi_ordering, 0); + EXPECT_EQ(INPUT.pexsi_row_ordering, 1); + EXPECT_EQ(INPUT.pexsi_nproc, 1); + EXPECT_TRUE(INPUT.pexsi_symm); + EXPECT_FALSE(INPUT.pexsi_trans); + EXPECT_EQ(INPUT.pexsi_method, 1); + EXPECT_EQ(INPUT.pexsi_nproc_pole, 1); + EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.015); + EXPECT_DOUBLE_EQ(INPUT.pexsi_gap, 0); + EXPECT_DOUBLE_EQ(INPUT.pexsi_delta_e, 20); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_lower, -10); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_upper, 10); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu, 0); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_thr, 0.05); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_expand, 0.3); + EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_guard, 0.2); + EXPECT_DOUBLE_EQ(INPUT.pexsi_elec_thr, 0.001); + EXPECT_DOUBLE_EQ(INPUT.pexsi_zero_thr, 1e-10); } TEST_F(InputParaTest, Init) diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index ef9c6a3d28..dc304df90b 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -927,4 +927,42 @@ TEST_F(write_input, Deltaspin22) EXPECT_THAT(output, testing::HasSubstr("sccut 3 #Maximal step size for lambda in eV/uB")); remove("write_input_test.log"); } + +TEST_F (write_input, PEXSI24) +{ + INPUT.Default(); + INPUT.Read("./support/witestfile"); + std::string output_file = "write_input_test.log"; + INPUT.Print(output_file); + int a = access("write_input_test.log", 00); + EXPECT_EQ(a, 0); + std::ifstream ifs ("write_input_test.log"); + std::string output ((std::istreambuf_iterator(ifs)), std::istreambuf_iterator()); + EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_npole 40 #Number of poles in expansion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia 1 #Whether inertia counting is used at the very beginning of PEXSI process")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax 80 #Maximum number of PEXSI iterations after each inertia counting procedure.")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_comm 1 #Whether to construct PSelInv communication pattern")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_storage 1 #Storage space used by the Selected Inversion algorithm for symmetric matrices.")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_ordering 0 #Ordering strategy for factorization and selected inversion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_row_ordering 1 #Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc 1 #Number of processors for parmetis")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_symm 1 #Matrix symmetry")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_trans 0 #Whether to transpose")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_method 1 #pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc_pole 1 #Number of processes used by each pole")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_temp 0.015 #Temperature, in the same unit as H")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_gap 0 #Spectral gap")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_delta_e 20 #An upper bound for the spectral radius of \\f$S^{-1} H\\f$")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_lower -10 #Initial guess of lower bound for mu")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_upper 10 #Initial guess of upper bound for mu")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu 0 #Initial guess for mu (for the solver)")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_thr 0.05 #Stopping criterion in terms of the chemical potential for the inertia counting procedure")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_expand 0.3 #If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_guard 0.2 #Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_elec_thr 0.001 #Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact")); + EXPECT_THAT(output, testing::HasSubstr("pexsi_zero_thr 1e-10 #if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0")); + ifs.close(); + remove("write_input_test.log"); +} #undef private diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp index a9754d8939..91a04201ab 100644 --- a/source/module_io/write_input.cpp +++ b/source/module_io/write_input.cpp @@ -219,7 +219,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ofs << "\n#Parameters (5.LCAO)" << std::endl; ModuleBase::GlobalFunc::OUTP(ofs, "basis_type", basis_type, "PW; LCAO in pw; LCAO"); - if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver") + if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi") { ModuleBase::GlobalFunc::OUTP(ofs, "nb2d", nb2d, "2d distribution of atoms"); } @@ -511,7 +511,32 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential"); ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function"); - + + ofs << "\n#Parameters (24.PEXSI)" << std::endl; + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure."); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices."); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "Matrix symmetry"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "Whether to transpose"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \\f$S^{-1} H\\f$"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact"); + ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0"); + ofs.close(); return; } \ No newline at end of file