diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2ce46a2e5a..d049887b19 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,6 +37,7 @@ option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON)
 option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF)
 option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF)
 option(ENABLE_CNPY "Enable cnpy usage." OFF)
+option(ENABLE_PEXSI "Enable support for PEXSI." OFF)
 
 # enable json support
 if(ENABLE_RAPIDJSON)
@@ -211,6 +212,14 @@ if(ENABLE_LCAO)
   if(ENABLE_FFT_TWO_CENTER)
     add_compile_definitions(USE_NEW_TWO_CENTER)
   endif()
+  
+  if(ENABLE_PEXSI)
+    find_package(PEXSI REQUIRED)
+    target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi)
+    include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR})
+    add_compile_definitions(__PEXSI)
+    set(CMAKE_CXX_STANDARD 14)
+  endif()
 else()
   set(ENABLE_DEEPKS OFF)
   set(ENABLE_LIBRI OFF)
diff --git a/cmake/FindPEXSI.cmake b/cmake/FindPEXSI.cmake
new file mode 100644
index 0000000000..5adc4c8a6d
--- /dev/null
+++ b/cmake/FindPEXSI.cmake
@@ -0,0 +1,57 @@
+###############################################################################
+# - Find PEXSI
+# Find PEXSI and its dependencies.
+#
+#  PEXSI_FOUND          - True if pexsi is found.
+#  PEXSI_INCLUDE_DIR    - Where to find pexsi headers.
+#  PEXSI_LIBRARY        - pexsi library.
+#  ParMETIS_INCLUDE_DIR - Where to find pexsi headers.
+#  ParMETIS_LIBRARY     - parmetis library.
+#  METIS_LIBRARY        - metis library.
+#  SuperLU_DIST_LIBRARY - superlu_dist library.
+
+find_path(PEXSI_INCLUDE_DIR
+    NAMES c_pexsi_interface.h
+    HINTS ${PEXSI_DIR}
+    PATH_SUFFIXES "include"
+)
+
+find_library(PEXSI_LIBRARY
+    NAMES pexsi
+    HINTS ${PEXSI_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+find_path(ParMETIS_INCLUDE_DIR
+    NAMES metis.h parmetis.h
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "include"
+)
+
+find_library(METIS_LIBRARY
+    NAMES metis
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+find_library(ParMETIS_LIBRARY
+    NAMES parmetis
+    HINTS ${ParMETIS_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+find_library(SuperLU_DIST_LIBRARY
+    NAMES superlu_dist
+    HINTS ${SuperLU_DIST_DIR}
+    PATH_SUFFIXES "lib"
+)
+
+# Handle the QUIET and REQUIRED arguments and
+# set PEXSI_FOUND to TRUE if all variables are non-zero.
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PEXSI DEFAULT_MSG PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY METIS_LIBRARY SuperLU_DIST_LIBRARY)
+
+
+# Copy the results to the output variables and target.
+mark_as_advanced(PEXSI_LIBRARY PEXSI_INCLUDE_DIR ParMETIS_LIBRARY SuperLU_DIST_LIBRARY)
+
diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index a3055c70ec..d1c996127d 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -377,6 +377,30 @@
     - [qo\_strategy](#qo_strategy)
     - [qo\_screening\_coeff](#qo_screening_coeff)
     - [qo\_thr](#qo_thr)
+  - [PEXSI](#pexsi)
+    - [pexsi\_npole](#pexsi_npole)
+    - [pexsi\_inertia](#pexsi_inertia)
+    - [pexsi\_nmax](#pexsi_nmax)
+    - [pexsi\_comm](#pexsi_comm)
+    - [pexsi\_storage](#pexsi_storage)
+    - [pexsi\_ordering](#pexsi_ordering)
+    - [pexsi\_row\_ordering](#pexsi_row_ordering)
+    - [pexsi\_nproc](#pexsi_nproc)
+    - [pexsi\_symm](#pexsi_symm)
+    - [pexsi\_trans](#pexsi_trans)
+    - [pexsi\_method](#pexsi_method)
+    - [pexsi\_nproc\_pole](#pexsi_nproc_pole)
+    - [pexsi\_temp](#pexsi_temp)
+    - [pexsi\_gap](#pexsi_gap)
+    - [pexsi\_delta\_e](#pexsi_delta_e)
+    - [pexsi\_mu\_lower](#pexsi_mu_lower)
+    - [pexsi\_mu\_upper](#pexsi_mu_upper)
+    - [pexsi\_mu](#pexsi_mu)
+    - [pexsi\_mu\_thr](#pexsi_mu_thr)
+    - [pexsi\_mu\_expand](#pexsi_mu_expand)
+    - [pexsi\_mu\_guard](#pexsi_mu_guard)
+    - [pexsi\_elec\_thr](#pexsi_elec_thr)
+    - [pexsi\_zero\_thr](#pexsi_zero_thr)
 
 [back to top](#full-list-of-input-keywords)
 
@@ -3530,4 +3554,146 @@ These variables are used to control the usage of QO analysis. QO further compres
 - **Description**: the convergence threshold determining the cutoff of generated orbital. Lower threshold will yield orbital with larger cutoff radius.
 - **Default**: 1.0e-6
 
+## PEXSI
+
+These variables are used to control the usage of PEXSI (Pole Expansion and Selected Inversion) method in calculations.
+
+### pexsi_npole
+
+- **Type**: Integer
+- **Description**: the number of poles used in the pole expansion method, should be a even number.
+- **Default**: 40
+
+### pexsi_inertia
+
+- **Type**: Boolean
+- **Description**: whether inertia counting is used at the very beginning.
+- **Default**: True
+
+### pexsi_nmax
+
+- **Type**: Integer
+- **Description**: maximum number of PEXSI iterations after each inertia counting procedure.
+- **Default**: 80
+
+### pexsi_comm
+
+- **Type**: Boolean
+- **Description**: whether to construct PSelInv communication pattern.
+- **Default**: True
+
+### pexsi_storage
+
+- **Type**: Boolean
+- **Description**: whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices.
+- **Default**: True
+
+### pexsi_ordering
+
+- **Type**: Integer
+- **Description**: ordering strategy for factorization and selected inversion. 0: Parallel ordering using ParMETIS, 1: Sequential ordering using METIS, 2: Multiple minimum degree ordering
+- **Default**: 0
+
+### pexsi_row_ordering
+
+- **Type**: Integer
+- **Description**: row permutation strategy for factorization and selected inversion, 0: No row permutation, 1: Make the diagonal entry of the matrix larger than the off-diagonal entries.
+- **Default**: 1
+
+### pexsi_nproc
+
+- **Type**: Integer
+- **Description**: number of processors for PARMETIS. Only used if pexsi_ordering == 0.
+- **Default**: 1
+
+### pexsi_symm
+
+- **Type**: Boolean
+- **Description**: whether the matrix is symmetric.
+- **Default**: True
+
+### pexsi_trans
+
+- **Type**: Boolean
+- **Description**: whether to factorize the transpose of the matrix.
+- **Default**: False
+
+### pexsi_method
+
+- **Type**: Integer
+- **Description**: the pole expansion method to be used. 1 for Cauchy Contour Integral method, 2 for Moussa optimized method.
+- **Default**: 1
+
+### pexsi_nproc_pole
+
+- **Type**: Integer
+- **Description**: the point parallelizaion of PEXSI. Recommend two points parallelization.
+- **Default**: 1
+
+### pexsi_temp
+
+- **Type**: Real
+- **Description**: temperature in Fermi-Dirac distribution, in Ry, should have the same effect as the smearing sigma when smearing method is set to Fermi-Dirac.
+- **Default**: 0.015
+
+### pexsi_gap
+
+- **Type**: Real
+- **Description**: spectral gap, this can be set to be 0 in most cases.
+- **Default**: 0
+
+### pexsi_delta_e
+
+- **Type**: Real
+- **Description**: an upper bound for the spectral radius of $S^{-1} H$.
+- **Default**: 20
+
+### pexsi_mu_lower
+
+- **Type**: Real
+- **Description**: initial guess of lower bound for mu.
+- **Default**: -10
+
+### pexsi_mu_upper
+
+- **Type**: Real
+- **Description**: initial guess of upper bound for mu.
+- **Default**: 10
+
+### pexsi_mu
+
+- **Type**: Real
+- **Description**: initial guess for mu (for the solver).
+- **Default**: 0
+
+### pexsi_mu_thr
+
+- **Type**: Real
+- **Description**: stopping criterion in terms of the chemical potential for the inertia counting procedure.
+- **Default**: 0.05
+
+### pexsi_mu_expand
+
+- **Type**: Real
+- **Description**: if the chemical potential is not in the initial interval, the interval is expanded by this value.
+- **Default**: 0.3
+
+### pexsi_mu_guard
+
+- **Type**: Real
+- **Description**: safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure.
+- **Default**: 0.2
+
+### pexsi_elec_thr
+
+- **Type**: Real
+- **Description**: stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact.
+- **Default**: 0.001
+
+### pexsi_zero_thr
+
+- **Type**: Real
+- **Description**: if the absolute value of CCS matrix element is less than this value, it will be considered as zero.
+- **Default**: 1e-10
+
 [back to top](#full-list-of-input-keywords)
diff --git a/docs/advanced/install.md b/docs/advanced/install.md
index d6201a060f..a5deeb5888 100644
--- a/docs/advanced/install.md
+++ b/docs/advanced/install.md
@@ -104,6 +104,18 @@ Currently supported math functions:
 cmake -B build -DUSE_ABACUS_LIBM=1
 ```
 
+## Build with PEXSI support
+
+ABACUS supports the PEXSI library for gamma only LCAO calculations. PEXSI version 2.0.0 is tested to work with ABACUS, please always use the latest version of PEXSI. 
+
+To build ABACUS with PEXSI support, you need to compile PEXSI (and its dependencies) first. Please refer to the [PEXSI Installation Guide](https://pexsi.readthedocs.io/en/latest/install.html) for more details. Note that PEXSI requires ParMETIS and SuperLU_DIST.
+
+After compiling PEXSI, you can set `ENABLE_PEXSI` to `ON`. If the libraries are not installed in standard paths, you can set `PEXSI_DIR`, `ParMETIS_DIR` and `SuperLU_DIST_DIR` to the corresponding directories.
+
+```bash
+cmake -B build -DENABLE_PEXSI=ON -DPEXSI_DIR=${path to PEXSI installation directory} -DParMETIS_DIR=${path to ParMETIS installation directory} -DSuperLU_DIST_DIR=${path to SuperLU_DIST installation directory}
+```
+
 ## Build ABACUS with make
 
 > Note: We suggest using CMake to configure and compile.
diff --git a/examples/pexsi/md_Si8/INPUT b/examples/pexsi/md_Si8/INPUT
new file mode 100644
index 0000000000..13231579bc
--- /dev/null
+++ b/examples/pexsi/md_Si8/INPUT
@@ -0,0 +1,32 @@
+INPUT_PARAMETERS
+#Parameters (1.General)
+suffix                 Si_rescaling
+calculation            md
+nbands                 20
+symmetry               0
+pseudo_dir             ../../../tests/PP_ORB
+orbital_dir            ../../../tests/PP_ORB
+
+#Parameters (2.Iteration)
+ecutwfc                30
+scf_thr                1e-5
+scf_nmax               100
+
+#Parameters (3.Basis)
+basis_type             lcao
+ks_solver              pexsi
+gamma_only             1
+
+#Parameters (5.Mixing)
+mixing_type            broyden
+mixing_beta            0.3
+chg_extrap             second-order
+
+#Parameters (6.MD)
+md_type                nvt
+md_thermostat          rescaling
+md_tolerance           10
+md_nstep               10
+md_dt                  1
+md_tfirst              300
+md_tfreq               0.025
diff --git a/examples/pexsi/md_Si8/KPT b/examples/pexsi/md_Si8/KPT
new file mode 100644
index 0000000000..c289c0158a
--- /dev/null
+++ b/examples/pexsi/md_Si8/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+1 1 1 0 0 0
diff --git a/examples/pexsi/md_Si8/STRU b/examples/pexsi/md_Si8/STRU
new file mode 100644
index 0000000000..795530e8cf
--- /dev/null
+++ b/examples/pexsi/md_Si8/STRU
@@ -0,0 +1,28 @@
+ATOMIC_SPECIES
+Si  28.085  Si_ONCV_PBE-1.0.upf
+
+NUMERICAL_ORBITAL
+Si_gga_8au_60Ry_2s2p1d.orb
+
+LATTICE_CONSTANT
+1.8897270    # 1 Angstrom = 1.8897270 bohr
+
+LATTICE_VECTORS
+5.43090      0.00000      0.00000
+0.00000      5.43090      0.00000
+0.00000      0.00000      5.43090
+
+ATOMIC_POSITIONS
+Direct 
+
+Si
+0.0
+8
+0.000	0.000	0.000	1	1	1
+0.000	0.500	0.500	1	1	1
+0.500	0.000	0.500	1	1	1
+0.500	0.500	0.000	1	1	1
+0.250	0.250	0.250	1	1	1
+0.250	0.750	0.750	1	1	1
+0.750	0.250	0.750	1	1	1
+0.750	0.750	0.250	1	1	1
\ No newline at end of file
diff --git a/examples/pexsi/scf_Si64/INPUT b/examples/pexsi/scf_Si64/INPUT
new file mode 100755
index 0000000000..ddb9813701
--- /dev/null
+++ b/examples/pexsi/scf_Si64/INPUT
@@ -0,0 +1,20 @@
+INPUT_PARAMETERS
+suffix      test
+ntype       1
+nbands      200
+pseudo_dir          ../../../tests/PP_ORB
+orbital_dir         ../../../tests/PP_ORB
+
+calculation scf
+mixing_beta 0.4
+basis_type  lcao
+gamma_only  1
+symmetry    0
+
+ecutwfc     60
+lcao_dr         1e-3
+scf_nmax	      20
+
+ks_solver   pexsi
+
+pexsi_npole  40
\ No newline at end of file
diff --git a/examples/pexsi/scf_Si64/KPT b/examples/pexsi/scf_Si64/KPT
new file mode 100644
index 0000000000..c289c0158a
--- /dev/null
+++ b/examples/pexsi/scf_Si64/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+1 1 1 0 0 0
diff --git a/examples/pexsi/scf_Si64/STRU b/examples/pexsi/scf_Si64/STRU
new file mode 100755
index 0000000000..70e722c0ec
--- /dev/null
+++ b/examples/pexsi/scf_Si64/STRU
@@ -0,0 +1,85 @@
+ATOMIC_SPECIES
+Si 28 Si_ONCV_PBE-1.0.upf
+
+NUMERICAL_ORBITAL
+Si_gga_8au_60Ry_2s2p1d.orb
+
+LATTICE_CONSTANT
+10.2  // add lattice constant
+
+LATTICE_VECTORS
+2 0 0
+0 2 0
+0 0 2
+
+ATOMIC_POSITIONS
+Cartesian //Cartesian or Direct coordinate.
+
+Si	// Element type	
+0	// magnetism
+64	// number of atoms
+0.00 0.00 0.00 0 0 0
+0.25 0.25 0.25 0 0 0
+0.00 0.50 0.50 0 0 0
+0.25 0.75 0.75 0 0 0
+0.50 0.00 0.50 0 0 0
+0.75 0.25 0.75 0 0 0
+0.50 0.50 0.00 0 0 0
+0.75 0.75 0.25 0 0 0
+1.00 0.00 0.00 0 0 0
+1.25 0.25 0.25 0 0 0
+1.00 0.50 0.50 0 0 0
+1.25 0.75 0.75 0 0 0
+1.50 0.00 0.50 0 0 0
+1.75 0.25 0.75 0 0 0
+1.50 0.50 0.00 0 0 0
+1.75 0.75 0.25 0 0 0
+0.00 1.00 0.00 0 0 0
+0.25 1.25 0.25 0 0 0
+0.00 1.50 0.50 0 0 0
+0.25 1.75 0.75 0 0 0
+0.50 1.00 0.50 0 0 0
+0.75 1.25 0.75 0 0 0
+0.50 1.50 0.00 0 0 0
+0.75 1.75 0.25 0 0 0
+1.00 1.00 0.00 0 0 0
+1.25 1.25 0.25 0 0 0
+1.00 1.50 0.50 0 0 0
+1.25 1.75 0.75 0 0 0
+1.50 1.00 0.50 0 0 0
+1.75 1.25 0.75 0 0 0
+1.50 1.50 0.00 0 0 0
+1.75 1.75 0.25 0 0 0
+0.00 0.00 1.00 0 0 0
+0.25 0.25 1.25 0 0 0
+0.00 0.50 1.50 0 0 0
+0.25 0.75 1.75 0 0 0
+0.50 0.00 1.50 0 0 0
+0.75 0.25 1.75 0 0 0
+0.50 0.50 1.00 0 0 0
+0.75 0.75 1.25 0 0 0
+1.00 0.00 1.00 0 0 0
+1.25 0.25 1.25 0 0 0
+1.00 0.50 1.50 0 0 0
+1.25 0.75 1.75 0 0 0
+1.50 0.00 1.50 0 0 0
+1.75 0.25 1.75 0 0 0
+1.50 0.50 1.00 0 0 0
+1.75 0.75 1.25 0 0 0
+0.00 1.00 1.00 0 0 0
+0.25 1.25 1.25 0 0 0
+0.00 1.50 1.50 0 0 0
+0.25 1.75 1.75 0 0 0
+0.50 1.00 1.50 0 0 0
+0.75 1.25 1.75 0 0 0
+0.50 1.50 1.00 0 0 0
+0.75 1.75 1.25 0 0 0
+1.00 1.00 1.00 0 0 0
+1.25 1.25 1.25 0 0 0
+1.00 1.50 1.50 0 0 0
+1.25 1.75 1.75 0 0 0
+1.50 1.00 1.50 0 0 0
+1.75 1.25 1.75 0 0 0
+1.50 1.50 1.00 0 0 0
+1.75 1.75 1.25 0 0 0
+
diff --git a/examples/pexsi/scf_spin_Fe2/INPUT b/examples/pexsi/scf_spin_Fe2/INPUT
new file mode 100644
index 0000000000..a6a5bcc971
--- /dev/null
+++ b/examples/pexsi/scf_spin_Fe2/INPUT
@@ -0,0 +1,22 @@
+INPUT_PARAMETERS
+suffix         autotest
+#nbands   40 
+
+calculation    scf
+ecutwfc        20
+scf_thr            1.0e-8
+scf_nmax          50
+out_chg     0
+
+mixing_type    broyden
+
+
+ks_solver     pexsi
+pexsi_temp    0.1
+pexsi_npole   80
+basis_type    lcao
+gamma_only    1
+symmetry      0
+nspin         2
+pseudo_dir	../../../tests/PP_ORB
+orbital_dir	../../../tests/PP_ORB
diff --git a/examples/pexsi/scf_spin_Fe2/KPT b/examples/pexsi/scf_spin_Fe2/KPT
new file mode 100644
index 0000000000..c289c0158a
--- /dev/null
+++ b/examples/pexsi/scf_spin_Fe2/KPT
@@ -0,0 +1,4 @@
+K_POINTS
+0
+Gamma
+1 1 1 0 0 0
diff --git a/examples/pexsi/scf_spin_Fe2/STRU b/examples/pexsi/scf_spin_Fe2/STRU
new file mode 100644
index 0000000000..b7a2039467
--- /dev/null
+++ b/examples/pexsi/scf_spin_Fe2/STRU
@@ -0,0 +1,29 @@
+ATOMIC_SPECIES
+Fe1 1.000 Fe_ONCV_PBE-1.0.upf
+Fe2 1.000 Fe_ONCV_PBE-1.0.upf
+
+NUMERICAL_ORBITAL
+Fe_gga_9au_100Ry_4s2p2d1f.orb
+Fe_gga_9au_100Ry_4s2p2d1f.orb
+
+LATTICE_CONSTANT
+15
+
+LATTICE_VECTORS
+ 1.00    0.50     0.50
+ 0.50    1.00     0.50
+ 0.50    0.50     1.00
+ATOMIC_POSITIONS
+Direct
+
+Fe1
+5.0
+1
+0.00            0.00            0.00            1        1       1
+
+Fe2
+-5.0
+1
+0.50            0.50            0.50            1        1       1
+
+
diff --git a/source/Makefile b/source/Makefile
index c9a6962bb8..e18e54ae9a 100644
--- a/source/Makefile
+++ b/source/Makefile
@@ -176,6 +176,13 @@ ifdef DeePMD_DIR
     INCLUDES += -I${TensorFlow_INCLUDE_DIR}
 endif
 
+ifdef PEXSI_DIR
+    OBJS_ABACUS += ${OBJS_HSOLVER_PEXSI}
+    INCLUDES += -I${PEXSI_DIR}/include -I${PARMETIS_DIR}/include -I${DSUPERLU_DIR}/include
+    LIBS += -L${PEXSI_DIR}/lib -lpexsi -L${DSUPERLU_DIR}/lib -lsuperlu_dist -L${PARMETIS_DIR}/lib -lparmetis -lmetis
+    HONG += -D__PEXSI
+endif
+
 include Makefile.Objects
 
 #==========================
diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index f61b7cced1..d56bae5d3c 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -30,6 +30,7 @@ VPATH=./src_global:\
 ./module_hsolver:\
 ./module_hsolver/kernels:\
 ./module_hsolver/genelpa:\
+./module_hsolver/module_pexsi:\
 ./module_elecstate:\
 ./module_elecstate/kernels:\
 ./module_elecstate/potentials:\
@@ -102,6 +103,7 @@ ${OBJS_VDW}\
 ${OBJS_DFTU}\
 ${OBJS_DELTASPIN}\
 ${OBJS_TENSOR}\
+${OBJS_HSOLVER_PEXSI}\
 
 OBJS_MAIN=main.o\
     driver.o\
@@ -295,7 +297,7 @@ OBJS_HSOLVER=diago_cg.o\
     diago_iter_assist.o\
     math_kernel_op.o\
     dngvd_op.o\
-
+    
 OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       diago_blas.o\
       diago_elpa.o\
@@ -304,6 +306,13 @@ OBJS_HSOLVER_LCAO=hsolver_lcao.o\
       elpa_new_complex.o\
       utils.o\
 
+OBJS_HSOLVER_PEXSI=diago_pexsi.o\
+      pexsi_solver.o\
+      simple_pexsi.o\
+      dist_bcd_matrix.o\
+      dist_ccs_matrix.o\
+      dist_matrix_transformer.o\
+      
 OBJS_MD=fire.o\
     langevin.o\
     md_base.o\
diff --git a/source/Makefile.vars b/source/Makefile.vars
index 99092a10ac..f00eca8ea3 100644
--- a/source/Makefile.vars
+++ b/source/Makefile.vars
@@ -59,6 +59,7 @@ CEREAL_DIR    = /usr/local/include/cereal
 ## To use LIBXC:  set LIBXC_DIR which contains include and lib/libxc.a (>5.1.7)
 ## To use DeePMD: set DeePMD_DIR and TensorFlow_DIR
 ## To use LibRI:  set LIBRI_DIR and LIBCOMM_DIR
+## To use PEXSI: set PEXSI_DIR DSUPERLU_DIR and PARMETIS_DIR
 ##---------------------------------------------------------------------
 
 # LIBTORCH_DIR  = /usr/local
@@ -72,6 +73,10 @@ CEREAL_DIR    = /usr/local/include/cereal
 # LIBRI_DIR     = /public/software/LibRI
 # LIBCOMM_DIR   = /public/software/LibComm
 
+# PEXSI_DIR = /public/software/pexsi
+# DSUPERLU_DIR = /public/software/superlu_dist
+# PARMETIS_DIR    = /public/software/parmetis
+
 ##---------------------------------------------------------------------
 # NP = 14 # It is not supported. use make -j14 or make -j to parallelly compile
 # DEBUG = OFF
diff --git a/source/module_base/global_function.h b/source/module_base/global_function.h
index 143ca35057..89e68c9d74 100644
--- a/source/module_base/global_function.h
+++ b/source/module_base/global_function.h
@@ -350,7 +350,7 @@ T ddot_real(
 //==========================================================
 static inline bool IS_COLUMN_MAJOR_KS_SOLVER()
 {
-    return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver" || GlobalV::KS_SOLVER=="cg_in_lcao";
+    return GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="cusolver" || GlobalV::KS_SOLVER=="cg_in_lcao" || GlobalV::KS_SOLVER=="pexsi";
 }
 
 }//namespace GlobalFunc
diff --git a/source/module_basis/module_ao/ORB_control.cpp b/source/module_basis/module_ao/ORB_control.cpp
index 7e1f3636c4..c3a0bb41d2 100644
--- a/source/module_basis/module_ao/ORB_control.cpp
+++ b/source/module_basis/module_ao/ORB_control.cpp
@@ -184,7 +184,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running,
     ofs_running << "\n SETUP THE DIVISION OF H/S MATRIX" << std::endl;
 
     // (1) calculate nrow, ncol, nloc.
-    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao")
+    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao" || ks_solver == "pexsi")
     {
         ofs_running << " divide the H&S matrix using 2D block algorithms." << std::endl;
 #ifdef __MPI
@@ -205,7 +205,7 @@ void ORB_control::setup_2d_division(std::ofstream& ofs_running,
     bool div_2d;
     if (ks_solver == "lapack" || ks_solver == "cg" || ks_solver == "dav" || ks_solver == "dav_subspace") div_2d = false;
 #ifdef __MPI
-    else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao") div_2d = true;
+    else if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao" || ks_solver == "pexsi") div_2d = true;
 #endif
     else
     {
@@ -375,7 +375,7 @@ assert(nb2d > 0);
     }
 
     // init blacs context for genelpa
-    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao")
+    if (ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "cg_in_lcao" || ks_solver == "pexsi")
     {
         pv->set_desc(nlocal, nlocal, pv->nrow);
         pv->set_desc_wfc_Eij(nlocal, nbands, pv->nrow);
diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp
index 4fdbe229fb..fad2b24dfd 100644
--- a/source/module_elecstate/elecstate_lcao.cpp
+++ b/source/module_elecstate/elecstate_lcao.cpp
@@ -1,4 +1,5 @@
 #include "elecstate_lcao.h"
+#include <vector>
 
 #include "cal_dm.h"
 #include "module_base/timer.h"
@@ -128,7 +129,7 @@ if(!GlobalV::dm_to_rho)
 
     }
     if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "scalapack_gvx" || GlobalV::KS_SOLVER == "lapack"
-        || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao")
+        || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao"  ||  GlobalV::KS_SOLVER == "pexsi")
     {
         for (int ik = 0; ik < psi.get_nk(); ik++)
         {
@@ -183,6 +184,7 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
         || GlobalV::KS_SOLVER == "cusolver" || GlobalV::KS_SOLVER == "cg_in_lcao")
     {
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
+
         // get DMK in 2d-block format
         //cal_dm(this->loc->ParaV, this->wg, psi, this->loc->dm_gamma);
         elecstate::cal_dm_psi(this->DM->get_paraV_pointer(), this->wg, psi, *(this->DM));
@@ -196,7 +198,6 @@ void ElecStateLCAO<double>::psiToRho(const psi::Psi<double>& psi)
             }
         }
         ModuleBase::timer::tick("ElecStateLCAO", "cal_dm_2d");
-
         for (int ik = 0; ik < psi.get_nk(); ++ik)
         {
             // for gamma_only case, no convertion occured, just for print.
@@ -266,6 +267,69 @@ double ElecStateLCAO<std::complex<double>>::get_spin_constrain_energy()
     return sc.cal_escon();
 }
 
+#ifdef __PEXSI
+template<>
+void ElecStateLCAO<double>::dmToRho(std::vector<double*> pexsi_DM, std::vector<double*> pexsi_EDM)
+{
+    ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
+
+    int nspin = GlobalV::NSPIN;
+    if (GlobalV::NSPIN == 4)
+    {
+        nspin = 1;
+    }
+
+    // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25
+    if (this->loc->out_dm) // keep interface for old Output_DM until new one is ready
+    {
+        for (int is = 0; is < nspin; ++is)
+        {
+            this->loc->set_dm_gamma(is, pexsi_DM[is]);
+        }
+        this->loc->cal_dk_gamma_from_2D_pub();
+    }
+
+    this->get_DM()->pexsi_EDM = pexsi_EDM;
+    
+    for (int is = 0; is < nspin; is++)
+    {
+        this->DM->set_DMK_pointer(is, pexsi_DM[is]);
+    }
+    DM->cal_DMR();
+    
+    for (int is = 0; is < GlobalV::NSPIN; is++)
+    {
+        ModuleBase::GlobalFunc::ZEROS(this->charge->rho[is], this->charge->nrxx); // mohan 2009-11-10
+    }
+
+    ModuleBase::GlobalFunc::NOTE("Calculate the charge on real space grid!");
+    this->gint_gamma->transfer_DM2DtoGrid(this->DM->get_DMR_vector()); // transfer DM2D to DM_grid in gint
+    Gint_inout inout(this->loc->DM, this->charge->rho, Gint_Tools::job_type::rho);
+    this->gint_gamma->cal_gint(&inout);
+    if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
+    {
+        for (int is = 0; is < GlobalV::NSPIN; is++)
+        {
+            ModuleBase::GlobalFunc::ZEROS(this->charge->kin_r[0], this->charge->nrxx);
+        }
+        Gint_inout inout1(this->loc->DM, this->charge->kin_r, Gint_Tools::job_type::tau);
+        this->gint_gamma->cal_gint(&inout1);
+    }
+
+    this->charge->renormalize_rho();
+
+    ModuleBase::timer::tick("ElecStateLCAO", "dmToRho");
+    return;
+}
+
+template<>
+void ElecStateLCAO<std::complex<double>>::dmToRho(std::vector<std::complex<double>*> pexsi_DM, std::vector<std::complex<double>*> pexsi_EDM)
+{
+    ModuleBase::WARNING_QUIT("ElecStateLCAO", "pexsi is not completed for multi-k case");
+}
+
+#endif
+
 template class ElecStateLCAO<double>; // Gamma_only case
 template class ElecStateLCAO<std::complex<double>>; // multi-k case
 
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index fdc4dd080f..c3e7ae3a2d 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -1,6 +1,7 @@
 #ifndef ELECSTATELCAO_H
 #define ELECSTATELCAO_H
 
+#include <vector>
 #include "elecstate.h"
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
@@ -64,6 +65,17 @@ class ElecStateLCAO : public ElecState
 
     double get_spin_constrain_energy() override;
 
+#ifdef __PEXSI
+    // use for pexsi
+
+    /** 
+     * @brief calculate electronic charge density from pointers of density matrix calculated by pexsi
+     * @param pexsi_DM: pointers of density matrix (DMK) calculated by pexsi
+     * @param pexsi_EDM: pointers of energy-weighed density matrix (EDMK) calculated by pexsi, needed by MD, will be stored in DensityMatrix::pexsi_EDM
+     */
+    void dmToRho(std::vector<TK*> pexsi_DM, std::vector<TK*> pexsi_EDM);
+#endif
+
   protected:
     // calculate electronic charge density on grid points or density matrix in real space
     // the consequence charge density rho saved into rho_out, preparing for charge mixing.
diff --git a/source/module_elecstate/elecstate_print.cpp b/source/module_elecstate/elecstate_print.cpp
index 78be828c66..d762f17826 100644
--- a/source/module_elecstate/elecstate_print.cpp
+++ b/source/module_elecstate/elecstate_print.cpp
@@ -305,6 +305,10 @@ void ElecState::print_etot(const bool converged,
     {
         label = "BP";
     }
+    else if (ks_solver_type == "pexsi")
+    {
+        label = "PE";
+    }
     else
     {
         ModuleBase::WARNING_QUIT("Energy", "print_etot found unknown ks_solver_type");
diff --git a/source/module_elecstate/module_dm/density_matrix.h b/source/module_elecstate/module_dm/density_matrix.h
index 6255401850..90f0db4ae7 100644
--- a/source/module_elecstate/module_dm/density_matrix.h
+++ b/source/module_elecstate/module_dm/density_matrix.h
@@ -209,6 +209,14 @@ class DensityMatrix
     
     std::vector<ModuleBase::ComplexMatrix> EDMK; // for TD-DFT
 
+#ifdef __PEXSI
+    /**
+     * @brief EDM storage for PEXSI
+     * used in MD calculation
+     */
+    std::vector<TK*> pexsi_EDM;
+#endif
+
   private:
     /**
      * @brief HContainer for density matrix in real space for 2D parallelization
@@ -256,6 +264,8 @@ class DensityMatrix
      * _nks = kv->_nks / nspin
      */
     int _nks = 0;
+
+
 };
 
 } // namespace elecstate
diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index aa8e53bd7c..36a8bf35a4 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -107,7 +107,7 @@ void ESolver_KS_LCAO<TK, TR>::beforesolver(const int istep)
         {
             nsk = GlobalV::NSPIN;
             ncol = this->LOWF.ParaV->ncol_bands;
-            if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "lapack_gvx"
+            if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "lapack_gvx" || GlobalV::KS_SOLVER=="pexsi"
                 || GlobalV::KS_SOLVER == "cusolver")
             {
                 ncol = this->LOWF.ParaV->ncol;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
index e719bc8537..992619aa17 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp
@@ -1,4 +1,5 @@
 #include "FORCE_gamma.h"
+#include "module_elecstate/elecstate_lcao.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_base/parallel_reduce.h"
 #include "module_base/timer.h"
@@ -36,8 +37,22 @@ void Force_LCAO_gamma::cal_foverlap(
     // construct a DensityMatrix for Gamma-Only
     const Parallel_Orbitals* pv = this->ParaV;
     elecstate::DensityMatrix<double, double> EDM(pv,GlobalV::NSPIN);
-
-    elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM);
+    
+#ifdef __PEXSI
+    if (GlobalV::KS_SOLVER == "pexsi")
+    {
+        auto pes = dynamic_cast<const elecstate::ElecStateLCAO<double>*>(pelec);
+        for (int ik = 0; ik < GlobalV::NSPIN; ik++)
+        {
+            EDM.set_DMK_pointer(ik, pes->get_DM()->pexsi_EDM[ik]);
+        }
+        
+    }
+    else
+#endif
+    {
+        elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psid[0], EDM);
+    }
 
     ModuleBase::timer::tick("Force_LCAO_gamma","cal_edm_2d");
 
diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
index 77035035da..247c8d8b0e 100644
--- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
+++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_odelta.cpp
@@ -63,7 +63,7 @@ void LCAO_Deepks::cal_o_delta_k(const std::vector<std::vector<ModuleBase::Comple
                     if (mu >= 0 && nu >= 0)
                     {                
                         int iic;
-                        if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx")  // save the matrix as column major format
+                        if(GlobalV::KS_SOLVER=="genelpa" || GlobalV::KS_SOLVER=="scalapack_gvx" || GlobalV::KS_SOLVER=="pexsi")  // save the matrix as column major format
                         {
                             iic = mu + nu * pv->nrow;
                         }
diff --git a/source/module_hsolver/CMakeLists.txt b/source/module_hsolver/CMakeLists.txt
index eac67c9b32..c4e7410da5 100644
--- a/source/module_hsolver/CMakeLists.txt
+++ b/source/module_hsolver/CMakeLists.txt
@@ -38,6 +38,14 @@ if(ENABLE_LCAO)
       add_coverage(diag_cusolver)
     endif()
   endif()
+
+  if(ENABLE_PEXSI)
+  list(APPEND objects
+      diago_pexsi.cpp
+    )
+    add_subdirectory(module_pexsi)
+  endif()
+
 endif()
 
 add_library(
@@ -57,4 +65,5 @@ endif()
 IF (BUILD_TESTING)
   add_subdirectory(test)
   add_subdirectory(kernels/test)
+  message(STATUS "Building tests")
 endif()
diff --git a/source/module_hsolver/diago_pexsi.cpp b/source/module_hsolver/diago_pexsi.cpp
new file mode 100644
index 0000000000..4076922626
--- /dev/null
+++ b/source/module_hsolver/diago_pexsi.cpp
@@ -0,0 +1,95 @@
+#include <mpi.h>
+#include <complex>
+#include <memory>
+#ifdef __PEXSI
+#include "diago_pexsi.h"
+#include "module_base/global_variable.h"
+#include "module_base/tool_quit.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_pexsi/pexsi_solver.h"
+
+typedef hamilt::MatrixBlock<double> matd;
+typedef hamilt::MatrixBlock<std::complex<double>> matcd;
+
+namespace hsolver
+{
+template <typename T>
+DiagoPexsi<T>::DiagoPexsi(const Parallel_Orbitals* ParaV_in)
+{
+    int nspin = GlobalV::NSPIN;
+    if (GlobalV::NSPIN == 4)
+    {
+        nspin = 1;
+    }
+    mu_buffer.resize(nspin);
+    for (int i = 0; i < nspin; i++)
+    {
+        mu_buffer[i] = this->ps->pexsi_mu;
+    }
+
+    this->ParaV = ParaV_in;
+    this->ps = std::make_unique<pexsi::PEXSI_Solver>();
+
+    this->DM.resize(nspin);
+    this->EDM.resize(nspin);
+    for (int i = 0; i < nspin; i++)
+    {
+        this->DM[i] = new T[ParaV->nrow * ParaV->ncol];
+        this->EDM[i] = new T[ParaV->nrow * ParaV->ncol];
+    }
+
+}
+
+template <typename T>
+DiagoPexsi<T>::~DiagoPexsi()
+{
+    int nspin = GlobalV::NSPIN;
+    if (GlobalV::NSPIN == 4)
+    {
+        nspin = 1;
+    }
+    for (int i = 0; i < nspin; i++)
+    {
+        delete[] this->DM[i];
+        delete[] this->EDM[i];
+    }
+
+}
+
+template <>
+void DiagoPexsi<double>::diag(hamilt::Hamilt<double>* phm_in, psi::Psi<double>& psi, double* eigenvalue_in)
+{
+    ModuleBase::TITLE("DiagoPEXSI", "diag");
+    matd h_mat, s_mat;
+    phm_in->matrix(h_mat, s_mat);
+    std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
+    int ik = psi.get_current_k();
+    this->ps->prepare(this->ParaV->blacs_ctxt,
+                      this->ParaV->nb,
+                      this->ParaV->nrow,
+                      this->ParaV->ncol,
+                      h_mat.p,
+                      s_mat.p,
+                      DM[ik],
+                      EDM[ik]);
+    this->ps->solve(mu_buffer[ik]);
+    this->totalFreeEnergy = this->ps->get_totalFreeEnergy();
+    this->totalEnergyH = this->ps->get_totalEnergyH();
+    this->totalEnergyS = this->ps->get_totalEnergyS();
+    this->mu_buffer[ik] = this->ps->get_mu();
+}
+
+template <>
+void DiagoPexsi<std::complex<double>>::diag(hamilt::Hamilt<std::complex<double>>* phm_in,
+                                            psi::Psi<std::complex<double>>& psi,
+                                            double* eigenvalue_in)
+{
+    ModuleBase::TITLE("DiagoPEXSI", "diag");
+    ModuleBase::WARNING_QUIT("DiagoPEXSI", "PEXSI is not completed for multi-k case");
+}
+
+template class DiagoPexsi<double>;
+template class DiagoPexsi<std::complex<double> >;
+
+} // namespace hsolver
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/diago_pexsi.h b/source/module_hsolver/diago_pexsi.h
new file mode 100644
index 0000000000..9802273e4b
--- /dev/null
+++ b/source/module_hsolver/diago_pexsi.h
@@ -0,0 +1,35 @@
+#ifndef DIGAOPEXSI_H
+#define DIGAOPEXSI_H
+
+#include <vector>
+#include <memory>
+#include "diagh.h"
+#include "module_base/global_variable.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_pexsi/pexsi_solver.h"
+
+namespace hsolver
+{
+
+template <typename T>
+class DiagoPexsi : public DiagH<T>
+{
+  private:
+    using Real = typename GetTypeReal<T>::type;
+    std::vector<double> mu_buffer;
+
+  public:
+    DiagoPexsi(const Parallel_Orbitals* ParaV_in);
+    void diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in) override;
+    const Parallel_Orbitals* ParaV;
+    std::vector<T*> DM;
+    std::vector<T*> EDM;
+    double totalEnergyH;
+    double totalEnergyS;
+    double totalFreeEnergy;
+    std::unique_ptr<pexsi::PEXSI_Solver> ps;
+    ~DiagoPexsi();
+};
+} // namespace hsolver
+
+#endif
diff --git a/source/module_hsolver/hsolver_lcao.cpp b/source/module_hsolver/hsolver_lcao.cpp
index 703430bbf0..70c635806c 100644
--- a/source/module_hsolver/hsolver_lcao.cpp
+++ b/source/module_hsolver/hsolver_lcao.cpp
@@ -16,6 +16,10 @@
 #ifdef __CUDA
 #include "diago_cusolver.h"
 #endif
+#ifdef __PEXSI
+#include "diago_pexsi.h"
+#include "module_elecstate/elecstate_lcao.h"
+#endif
 
 namespace hsolver
 {
@@ -139,6 +143,27 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
             this->pdiagh->method = this->method;
         }
     }
+#ifdef __PEXSI
+    else if (this->method == "pexsi")
+    {
+        if (this->pdiagh != nullptr)
+        {
+            if (this->pdiagh->method != this->method)
+            {
+                delete[] this->pdiagh;
+                this->pdiagh = nullptr;
+            }
+            auto tem = dynamic_cast<DiagoPexsi<T>*>(this->pdiagh);
+        }
+        if (this->pdiagh == nullptr)
+        {
+            DiagoPexsi<T>* tem = new DiagoPexsi<T>(this->ParaV);
+            this->pdiagh = tem;
+            // this->pdiagh = dynamic_cast<DiagoPexsi<T>*>(tem);
+            this->pdiagh->method = this->method;
+        }
+    }
+#endif
     else
     {
         ModuleBase::WARNING_QUIT("HSolverLCAO::solve", "This method of DiagH is not supported!");
@@ -183,7 +208,7 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
 
 
     if (this->method != "genelpa" && this->method != "scalapack_gvx" && this->method != "lapack"
-                        && this->method != "cusolver" && this->method != "cg_in_lcao")
+                        && this->method != "cusolver" && this->method != "cg_in_lcao" && this->method != "pexsi")
     {
         delete this->pdiagh;
         this->pdiagh = nullptr;
@@ -198,7 +223,21 @@ void HSolverLCAO<T, Device>::solveTemplate(hamilt::Hamilt<T>* pHamilt,
 
     // calculate charge by psi
     // called in scf calculation
-    pes->psiToRho(psi);
+#ifdef __PEXSI
+    if (this->method == "pexsi")
+    {
+        DiagoPexsi<T>* tem = dynamic_cast<DiagoPexsi<T>*>(this->pdiagh);
+        if (tem==nullptr) ModuleBase::WARNING_QUIT("HSolverLCAO", "pexsi need debug!");
+        elecstate::ElecStateLCAO<T>* _pes = dynamic_cast<elecstate::ElecStateLCAO<T>*>(pes);
+        pes->f_en.eband = tem->totalFreeEnergy;
+        // maybe eferm could be dealt with in the future
+        _pes->dmToRho(tem->DM, tem->EDM);
+    }
+    else
+#endif
+    {
+        pes->psiToRho(psi);
+    }
     ModuleBase::timer::tick("HSolverLCAO", "solve");
 }
 
diff --git a/source/module_hsolver/module_pexsi/CMakeLists.txt b/source/module_hsolver/module_pexsi/CMakeLists.txt
new file mode 100644
index 0000000000..87d16ff557
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_library(pexsi OBJECT dist_bcd_matrix.cpp dist_ccs_matrix.cpp dist_matrix_transformer.cpp pexsi_solver.cpp simple_pexsi.cpp)
+
+if(ENABLE_COVERAGE)
+  add_coverage(pexsi)
+endif()
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
new file mode 100644
index 0000000000..ff3f85f32b
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.cpp
@@ -0,0 +1,114 @@
+#ifdef __PEXSI
+#include "dist_bcd_matrix.h"
+#include <cctype>
+#include <mpi.h>
+extern "C"
+{
+    void Cblacs_gridinfo(int icontxt, int* nprow, int* npcol, int* myprow, int* mypcol);
+    int Cblacs_pnum(int blacs_ctxt, int prow, int pcol);
+};
+
+namespace pexsi
+{
+DistBCDMatrix::DistBCDMatrix(MPI_Comm comm,
+                             MPI_Group group,
+                             int blacs_ctxt,
+                             int size,
+                             int nblk,
+                             int nrow,
+                             int ncol,
+                             char layout)
+{
+    this->comm = comm;
+    this->group = group;
+    this->blacs_ctxt = blacs_ctxt;
+    this->size = size;
+    this->nblk = nblk;
+    this->nrow = nrow;
+    this->ncol = ncol;
+    if (layout == 'r' || layout == 'c')
+    {
+        this->layout = layout;
+    }
+    else
+    {
+        throw("The layout must be 'r' or 'c'");
+    }
+
+    if (comm != MPI_COMM_NULL)
+    {
+        MPI_Comm_rank(comm, &this->myproc);
+        Cblacs_gridinfo(blacs_ctxt, &this->nprows, &this->npcols, &this->myprow, &this->mypcol);
+    }
+    else
+    {
+        this->myproc = -1;
+        this->myprow = -1;
+        this->mypcol = -1;
+    }
+
+    // synchronize matrix parameters to all processes, including those are not in bcd group
+    int myid_in_comm_world;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myid_in_comm_world);
+    if (myid_in_comm_world == 0)
+    {
+        MPI_Comm_size(comm, &this->nprocs);
+        int PARA_BCAST[4] = {this->nblk, this->nprocs, this->nprows, this->npcols};
+        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
+    }
+    else
+    {
+        int PARA_BCAST[4];
+        MPI_Bcast(&PARA_BCAST[0], 4, MPI_INT, 0, MPI_COMM_WORLD);
+        this->nblk = PARA_BCAST[0];
+        this->nprocs = PARA_BCAST[1];
+        this->nprows = PARA_BCAST[2];
+        this->npcols = PARA_BCAST[3];
+    }
+    this->prowpcol2pnum = new int[this->nprocs];
+    if (myid_in_comm_world == 0)
+    {
+        for (int i = 0; i < this->nprows; ++i)
+        {
+            for (int j = 0; j < this->npcols; ++j)
+            {
+                this->prowpcol2pnum[i * this->npcols + j] = Cblacs_pnum(this->blacs_ctxt, i, j);
+            }
+        }
+    }
+    MPI_Bcast(this->prowpcol2pnum, this->nprocs, MPI_INT, 0, MPI_COMM_WORLD);
+}
+
+DistBCDMatrix::~DistBCDMatrix()
+{
+    delete[] prowpcol2pnum;
+}
+
+int DistBCDMatrix::globalRow(const int localRow)
+{
+    return (localRow / nblk * nprows + myprow) * nblk + localRow % nblk;
+}
+
+int DistBCDMatrix::globalCol(const int localCol)
+{
+    return (localCol / nblk * npcols + mypcol) * nblk + localCol % nblk;
+}
+
+int DistBCDMatrix::localRow(const int globalRow, int& myprow)
+{
+    myprow = int((globalRow % (nblk * nprows)) / nblk);
+    return int(globalRow / (nblk * nprows)) * nblk + globalRow % nblk;
+}
+
+int DistBCDMatrix::localCol(const int globalCol, int& mypcol)
+{
+    mypcol = int((globalCol % (nblk * npcols)) / nblk);
+    return int(globalCol / (nblk * npcols)) * nblk + globalCol % nblk;
+}
+
+int DistBCDMatrix::pnum(const int prow, const int pcol)
+{
+    return this->prowpcol2pnum[prow * this->npcols + pcol];
+}
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_bcd_matrix.h b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
new file mode 100644
index 0000000000..94b61277d2
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_bcd_matrix.h
@@ -0,0 +1,85 @@
+#ifndef DISTBCDMATRIX_H
+#define DISTBCDMATRIX_H
+
+#include <mpi.h>
+
+#include "module_hsolver/module_pexsi/dist_matrix_transformer.h"
+// a Block Cyclic Data Distribution matrix
+// http://www.netlib.org/utk/papers/factor/node3.html
+// local matrix elements is stored in column major
+// used for pexsi
+namespace pexsi
+{
+class DistBCDMatrix
+{
+  public:
+    DistBCDMatrix(MPI_Comm comm, MPI_Group group, int blacs_ctxt, int size, int nblk, int nrow, int ncol, char layout);
+    ~DistBCDMatrix();
+
+    int globalRow(const int localRow);
+    int globalCol(const int localCol);
+    int localRow(const int globalRow, int& myprow);
+    int localCol(const int globalCol, int& mypcol);
+    int pnum(const int prow, const int pcol);
+
+    const MPI_Comm get_comm() const
+    {
+        return comm;
+    };
+    const MPI_Group get_group() const
+    {
+        return group;
+    };
+    const int get_nrow() const
+    {
+        return nrow;
+    };
+    const int get_ncol() const
+    {
+        return ncol;
+    };
+    const char get_layout() const
+    {
+        return layout;
+    };
+
+  private:
+    // MPI communicator
+    MPI_Comm comm;
+    MPI_Group group;
+
+    // blacs context
+    int blacs_ctxt;
+
+    // row and column of process grid
+    int nprows;
+    int npcols;
+
+    // total number of processes
+    int nprocs;
+
+    // Matrix size
+    int size;
+
+    // block size
+    int nblk;
+
+    // row and c0lumn of Local matrix part
+    int nrow;
+    int ncol;
+
+    // current process row and column
+    int myprow;
+    int mypcol;
+
+    // current process id
+    int myproc;
+
+    int* prowpcol2pnum;
+    // the local data layout
+    // 'R' or 'r' for row-major, which is used in C/C++
+    // 'C' or 'c' for column-major, which is used in Fortran
+    char layout;
+};
+} // namespace pexsi
+#endif // DISTBCDMATRIX_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
new file mode 100644
index 0000000000..74391f2fbe
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.cpp
@@ -0,0 +1,119 @@
+#ifdef __PEXSI
+#include "dist_ccs_matrix.h"
+
+#include <mpi.h>
+
+namespace pexsi
+{
+DistCCSMatrix::DistCCSMatrix(void)
+{
+    this->comm = MPI_COMM_WORLD;
+    this->size = 0;
+    this->nnz = 0;
+    this->nnzLocal = 0;
+    this->numColLocal = 0;
+    this->colptrLocal = nullptr;
+    this->rowindLocal = nullptr;
+}
+
+DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in)
+{
+    this->comm = comm_in;
+    this->size = 0;
+    this->nnz = 0;
+    this->nnzLocal = 0;
+    this->numColLocal = 0;
+    this->colptrLocal = nullptr;
+    this->rowindLocal = nullptr;
+}
+
+DistCCSMatrix::DistCCSMatrix(int size_in, int nnzLocal_in)
+{
+    this->comm = MPI_COMM_WORLD;
+    this->size = size_in;
+    this->nnzLocal = nnzLocal_in;
+    MPI_Request req;
+    MPI_Iallreduce(&nnzLocal, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm, &req);
+    this->numColLocal = 0;
+    this->colptrLocal = new int[size];
+    this->rowindLocal = new int[nnzLocal];
+
+    MPI_Status req_status;
+    MPI_Wait(&req, &req_status);
+}
+
+DistCCSMatrix::DistCCSMatrix(MPI_Comm comm_in, int nproc_data_in, int size_in)
+{
+    this->comm = comm_in;
+    this->nproc_data = nproc_data_in;
+    int nproc_data_range[3] = {0, this->nproc_data - 1, 1};
+    // create processes group with data: this->group_data and associated communicator
+    MPI_Comm_group(this->comm, &this->group);
+    MPI_Group_range_incl(this->group, 1, &nproc_data_range, &this->group_data);
+    this->comm_data = MPI_COMM_NULL;
+    MPI_Comm_create(this->comm, this->group_data, &this->comm_data);
+    this->size = size_in;
+    this->nnz = 0;
+    this->nnzLocal = 0;
+    int myproc;
+    if (comm != MPI_COMM_NULL)
+    {
+        MPI_Comm_size(comm, &nprocs);
+        MPI_Comm_rank(comm, &myproc);
+        if (myproc < nproc_data - 1)
+        {
+            this->numColLocal = size / nproc_data;
+            this->firstCol = size / nproc_data * myproc;
+            this->colptrLocal = new int[this->numColLocal + 1];
+            this->rowindLocal = nullptr;
+        }
+        else if (myproc == nproc_data - 1)
+        {
+            this->numColLocal = size - myproc * (size / nproc_data);
+            this->firstCol = size / nproc_data * myproc;
+            this->colptrLocal = new int[this->numColLocal + 1];
+            this->rowindLocal = nullptr;
+        }
+        else
+        {
+            this->numColLocal = 0;
+            this->firstCol = size - 1;
+            this->colptrLocal = new int[this->numColLocal + 1];
+            this->rowindLocal = nullptr;
+        }
+    }
+}
+
+int DistCCSMatrix::globalCol(int localCol)
+{
+    return this->firstCol + localCol;
+}
+
+// NOTE: the process id is 0-based
+int DistCCSMatrix::localCol(int globalCol, int& mypcol)
+{
+    mypcol = int(globalCol / int(this->size / this->nproc_data));
+    if (mypcol >= this->nproc_data)
+        mypcol = this->nproc_data - 1;
+
+    return mypcol > 0 ? globalCol - (this->size / this->nproc_data) * mypcol : globalCol;
+}
+
+void DistCCSMatrix::setnnz(int nnzLocal_in)
+{
+    if (this->comm_data != MPI_COMM_NULL)
+    {
+        MPI_Allreduce(&nnzLocal_in, &this->nnz, 1, MPI_INT, MPI_SUM, this->comm_data);
+        this->nnzLocal = nnzLocal_in;
+        this->rowindLocal = new int[nnzLocal];
+        this->colptrLocal[this->numColLocal] = nnzLocal_in + 1;
+    }
+}
+
+DistCCSMatrix::~DistCCSMatrix()
+{
+    delete[] colptrLocal;
+    delete[] rowindLocal;
+}
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_ccs_matrix.h b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
new file mode 100644
index 0000000000..a63a0dc16c
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_ccs_matrix.h
@@ -0,0 +1,95 @@
+#ifndef DISTCCSMATRIX_H
+#define DISTCCSMATRIX_H
+
+#include <mpi.h>
+// Distributed Compressed Column Storage Matrix format
+// used for PEXSI
+namespace pexsi
+{
+class DistCCSMatrix
+{
+
+  public:
+    DistCCSMatrix();
+    DistCCSMatrix(MPI_Comm comm);
+    DistCCSMatrix(int size, int nnzLocal);
+    DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal);
+    DistCCSMatrix(MPI_Comm comm, int size, int nnzLocal, double* valLocal, int* index);
+
+    int globalCol(int localCol);
+    int localCol(int globalCol, int& mypcol);
+    void setnnz(int nnzLocal);
+
+    const MPI_Comm get_comm() const
+    {
+        return comm;
+    };
+    const MPI_Group get_group() const
+    {
+        return group;
+    };
+    const MPI_Group get_group_data() const
+    {
+        return group_data;
+    };
+    const int get_size() const
+    {
+        return size;
+    };
+    const int get_nnz() const
+    {
+        return nnz;
+    };
+    const int get_nnzlocal() const
+    {
+        return nnzLocal;
+    };
+    const int get_numcol_local() const
+    {
+        return numColLocal;
+    };
+    int* get_colptr_local() const
+    {
+        return colptrLocal;
+    };
+    int* get_rowind_local() const
+    {
+        return rowindLocal;
+    };
+
+    ~DistCCSMatrix();
+
+  private:
+    // MPI communicator
+    MPI_Comm comm;
+    MPI_Group group;
+
+    // total number of processes and the processes with data in
+    int nprocs;
+    int nproc_data;
+    MPI_Group group_data;
+    MPI_Comm comm_data;
+
+    // Matrix size
+    int size;
+
+    // Number of non-zero values in the matrix
+    int nnz;
+
+    // Number of non-zero values in the matrix of the local process
+    int nnzLocal;
+
+    // number of columns in current process
+    int numColLocal;
+
+    // the first column index in current process
+    int firstCol;
+
+    // Array stores the indices to the nonzero row indices in rowptrLocal and nzvalLocal
+    int* colptrLocal;
+    int* rowindLocal;
+
+    // friend class DistMatrixTransformer;
+};
+} // namespace pexsi
+#endif // DISTCCSMATRIX_H
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
new file mode 100644
index 0000000000..313a840e68
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.cpp
@@ -0,0 +1,765 @@
+#ifdef __PEXSI
+#include "dist_matrix_transformer.h"
+
+#include <mpi.h>
+
+#include <climits>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <vector>
+
+#include "dist_bcd_matrix.h"
+#include "dist_ccs_matrix.h"
+
+namespace pexsi
+{
+// find the minimum index, the return value will be a non-negtive value index value if it is found, otherwise will be a
+// negtive value the size_process and displacement_process array will be changed after the index is found isFirst:
+// wether this function is called for the first time for a index array; nprocs: total number of processes size_process:
+// the number of indices in each process displacement_process: the start position in each process index: the array
+// contains the indices
+inline int DistMatrixTransformer::MinimumIndexPosition(const bool isFirst,
+                                                       const int nprocs,
+                                                       int* size_process,
+                                                       int* displacement_process,
+                                                       const int* index)
+{
+    // usually the minimum index is continuous, so it will be a good idea to
+    // check the one next to the previous index first.
+    static int pre_position; // previous position in index array of minimum index,
+    static int pre_process;  // the process contains previous index
+
+    int minimum_index
+        = INT_MAX; // the minimum index, initial value is a large number which is larger than any other index;
+    int minimum_position = -1;
+    int minimum_process = -1;
+
+    if (isFirst)
+    {
+        for (int i = 0; i < nprocs; ++i)
+        {
+            if (size_process[i] > 0)
+            {
+                if (minimum_index > index[displacement_process[i]]) // find a smaller index
+                {
+                    minimum_position = displacement_process[i];
+                    minimum_index = index[minimum_position];
+                    minimum_process = i;
+                }
+            }
+        }
+        if (minimum_process >= 0) // find it!
+        {
+            ++displacement_process[minimum_process];
+            --size_process[minimum_process];
+        }
+        pre_position = minimum_position;
+        pre_process = minimum_process;
+        return minimum_position;
+    }
+    else
+    {
+        // check the next one of pre_position
+        if (size_process[pre_process] > 0 &&                    // the previous process still has elements
+            index[pre_position + 1] == index[pre_position] + 1) // find it!
+        {
+            ++displacement_process[pre_process];
+            --size_process[pre_process];
+            ++pre_position;      // new pre_position is the next one
+                                 // new pre_process keeps the same
+            return pre_position; // current position is the new pre_position
+        }
+
+        // if the next one of pre_position is not the minimum one
+        for (int i = 0; i < nprocs; ++i)
+        {
+            if (size_process[i] > 0)
+            {
+                if (minimum_index > index[displacement_process[i]])
+                {
+                    minimum_position = displacement_process[i];
+                    minimum_index = index[minimum_position];
+                    minimum_process = i;
+                }
+            }
+        }
+        if (minimum_process >= 0) // find it!
+        {
+            ++displacement_process[minimum_process];
+            --size_process[minimum_process];
+        }
+        pre_position = minimum_position;
+        pre_process = minimum_process;
+        return minimum_position;
+    }
+}
+
+inline void DistMatrixTransformer::buildCCSParameter(const int size,
+                                                     const int nprocs,
+                                                     std::vector<int> size_process,
+                                                     std::vector<int> displacement_process,
+                                                     const int* position_index,
+                                                     DistCCSMatrix& DST_Matrix,
+                                                     int* buffer2ccsIndex)
+{
+    // find the minimum one from left buffer index
+    if (DST_Matrix.get_nnzlocal() <= 0)
+        return;
+
+    int pre_col = -1;
+    int nnz_now = 0;
+    int p_mini;
+    p_mini = MinimumIndexPosition(true, nprocs, &size_process[0], &displacement_process[0], position_index);
+    while (p_mini >= 0)
+    {
+        int index_mini = position_index[p_mini];
+        int col_mini = index_mini / DST_Matrix.get_size(); //-DST_Matrix.firstCol;
+        int row_mini = index_mini % DST_Matrix.get_size();
+        if (col_mini > pre_col) // a new column starts, column pointer is a 1-based array
+        {
+            pre_col = col_mini;
+            DST_Matrix.get_colptr_local()[col_mini] = nnz_now + 1;
+        }
+        DST_Matrix.get_rowind_local()[nnz_now] = row_mini + 1; // setup row index array, which is also 1-based
+        // copy data from buffer to M, be careful M is a 0-based array
+        buffer2ccsIndex[nnz_now] = p_mini;
+        ++nnz_now;
+        p_mini = MinimumIndexPosition(false, nprocs, &size_process[0], &displacement_process[0], position_index);
+    }
+    // The last element of colptrLocal is nnzLocal+1
+    DST_Matrix.get_colptr_local()[DST_Matrix.get_numcol_local()] = nnz_now + 1;
+}
+
+inline void DistMatrixTransformer::buffer2CCSvalue(int nnzLocal,
+                                                   int* buffer2ccsIndex,
+                                                   double* buffer,
+                                                   double* nzvalLocal)
+{
+    for (int i = 0; i < nnzLocal; ++i)
+    {
+        nzvalLocal[i] = buffer[buffer2ccsIndex[i]];
+    }
+}
+inline void DistMatrixTransformer::countMatrixDistribution(int N, double* A, std::map<int, int>& P)
+{
+    for (int i = 0; i < N; ++i)
+    {
+        int key;
+        if (fabs(A[i] < 1e-31))
+            key = -100;
+        else
+            key = floor(log10(fabs(A[i])));
+        ++P[key];
+    }
+}
+
+// find out the index of non-zero elements
+inline int DistMatrixTransformer::getNonZeroIndex(char layout,
+                                                  const int nrow,
+                                                  const int ncol,
+                                                  double* H_2d,
+                                                  double* S_2d,
+                                                  const double ZERO_Limit,
+                                                  int& nnz,
+                                                  std::vector<int>& rowidx,
+                                                  std::vector<int>& colidx)
+{
+    int idx = 0;
+    nnz = 0;
+    colidx.clear();
+    rowidx.clear();
+    if (layout == 'c')
+    {
+        for (int i = 0; i < ncol; ++i)
+        {
+            for (int j = 0; j < nrow; ++j)
+            {
+                idx = i * nrow + j;
+                if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
+                {
+                    ++nnz;
+                    colidx.push_back(i);
+                    rowidx.push_back(j);
+                }
+            }
+        }
+    }
+    else if (layout == 'r')
+    {
+        for (int i = 0; i < ncol; ++i)
+        {
+            for (int j = 0; j < nrow; ++j)
+            {
+                idx = j * ncol + i;
+                if (fabs(H_2d[idx]) > ZERO_Limit || fabs(S_2d[idx]) > ZERO_Limit)
+                {
+                    ++nnz;
+                    colidx.push_back(i);
+                    rowidx.push_back(j);
+                }
+            }
+        }
+    }
+    else
+    {
+        return 1;
+    }
+    return 0;
+}
+
+int DistMatrixTransformer::buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                                                   DistCCSMatrix& DST_Matrix,
+                                                   const int NPROC_TRANS,
+                                                   MPI_Group& GROUP_TRANS,
+                                                   MPI_Comm& COMM_TRANS,
+                                                   const int nnz,
+                                                   std::vector<int>& rowidx,
+                                                   std::vector<int>& colidx,
+                                                   int& sender_size,
+                                                   std::vector<int>& sender_size_process,
+                                                   std::vector<int>& sender_displacement_process,
+                                                   int& receiver_size,
+                                                   std::vector<int>& receiver_size_process,
+                                                   std::vector<int>& receiver_displacement_process,
+                                                   std::vector<int>& buffer2ccsIndex)
+{
+    int myproc;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+    sender_size = nnz;
+    std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
+    // create process id map from group_data to group_trans
+    int nproc_data;
+    std::vector<int> proc_map_data_trans;
+    if (myproc == 0)
+    {
+        MPI_Group_size(DST_Matrix.get_group_data(), &nproc_data);
+        MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
+        proc_map_data_trans.resize(nproc_data, 0);
+        for (int i = 0; i < nproc_data; ++i)
+        {
+            MPI_Group_translate_ranks(DST_Matrix.get_group_data(), 1, &i, GROUP_TRANS, &proc_map_data_trans[i]);
+        }
+        MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
+    }
+    else
+    {
+        MPI_Bcast(&nproc_data, 1, MPI_INT, 0, COMM_TRANS);
+        proc_map_data_trans.resize(nproc_data, 0);
+        MPI_Bcast(&proc_map_data_trans[0], nproc_data, MPI_INT, 0, COMM_TRANS);
+    }
+
+    for (int i = 0; i < nnz; ++i)
+    {
+        int l_col = colidx[i];
+        int g_col = SRC_Matrix.globalCol(l_col);
+        int dst_process;
+        int dst_col = DST_Matrix.localCol(g_col, dst_process);
+        int dst_process_trans = proc_map_data_trans[dst_process];
+        ++sender_size_process[dst_process_trans];
+    }
+    // transfer sender index size to receiver index size
+    MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
+    // setup all2all parameters
+    sender_displacement_process[0] = 0;
+    for (int i = 1; i < NPROC_TRANS; ++i)
+    {
+        sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1];
+    }
+
+    receiver_displacement_process[0] = 0;
+    receiver_size = receiver_size_process[0];
+    for (int i = 1; i < NPROC_TRANS; ++i)
+    {
+        receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1];
+        receiver_size += receiver_size_process[i];
+    }
+
+    // setup receiver index
+    // setup sender_index
+    std::vector<int> sender_index(sender_size);
+    for (int i = 0; i < nnz; ++i)
+    {
+        int l_col = colidx[i];
+        int g_col = SRC_Matrix.globalCol(l_col);
+        int dst_process;
+        int dst_col = DST_Matrix.localCol(g_col, dst_process);
+        int l_row = rowidx[i];
+        int dst_row = SRC_Matrix.globalRow(l_row);
+        sender_index[i] = dst_col * DST_Matrix.get_size() + dst_row;
+    }
+
+    // transfer index to receiver
+    std::vector<int> receiver_index(receiver_size);
+    MPI_Alltoallv(&sender_index[0],
+                  &sender_size_process[0],
+                  &sender_displacement_process[0],
+                  MPI_INT,
+                  &receiver_index[0],
+                  &receiver_size_process[0],
+                  &receiver_displacement_process[0],
+                  MPI_INT,
+                  COMM_TRANS);
+
+    // setup buffer2ccsIndex based on receiver_index
+    buffer2ccsIndex.resize(receiver_size);
+    DST_Matrix.setnnz(receiver_size);
+    buildCCSParameter(receiver_size,
+                      NPROC_TRANS,
+                      receiver_size_process,
+                      receiver_displacement_process,
+                      &receiver_index[0],
+                      DST_Matrix,
+                      &buffer2ccsIndex[0]);
+    return 0;
+}
+
+int DistMatrixTransformer::newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                                             DistCCSMatrix& DST_Matrix,
+                                             MPI_Group& GROUP_TRANS,
+                                             MPI_Comm& COMM_TRANS)
+{
+    // build transfortram communicator which contains both processes of BCD processors and
+    // CCS processors with nonzero elements
+    MPI_Group_union(DST_Matrix.get_group_data(), SRC_Matrix.get_group(), &GROUP_TRANS);
+    MPI_Comm_create(MPI_COMM_WORLD, GROUP_TRANS, &COMM_TRANS);
+    return 0;
+}
+
+int DistMatrixTransformer::deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS)
+{
+    MPI_Group_free(&GROUP_TRANS);
+    if (COMM_TRANS != MPI_COMM_NULL)
+    {
+        MPI_Comm_free(&COMM_TRANS);
+    }
+    return 0;
+}
+
+// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// two destination matrices share the same non-zero elements positions
+// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero,
+// even if one of them is acturely zero All matrices must have same MPI communicator
+int DistMatrixTransformer::transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
+                                             double* H_2d,
+                                             double* S_2d,
+                                             const double ZERO_Limit,
+                                             DistCCSMatrix& DST_Matrix,
+                                             double*& H_ccs,
+                                             double*& S_ccs)
+{
+    MPI_Group GROUP_TRANS;
+    MPI_Comm COMM_TRANS = MPI_COMM_NULL;
+    newGroupCommTrans(SRC_Matrix, DST_Matrix, GROUP_TRANS, COMM_TRANS);
+    if (COMM_TRANS != MPI_COMM_NULL)
+    {
+        // set up sender and receiver=
+        int NPROC_TRANS;
+        MPI_Comm_size(COMM_TRANS, &NPROC_TRANS);
+        int sender_size;
+        std::vector<int> sender_size_process(NPROC_TRANS);
+        std::vector<int> sender_displacement_process(NPROC_TRANS);
+        int receiver_size;
+        std::vector<int> receiver_size_process(NPROC_TRANS);
+        std::vector<int> receiver_displacement_process(NPROC_TRANS);
+
+        // find out the non-zeros elements' positions
+        std::vector<int> rowidx;
+        std::vector<int> colidx;
+        int nnz = 0;
+        if (SRC_Matrix.get_comm() != MPI_COMM_NULL)
+        {
+            getNonZeroIndex(SRC_Matrix.get_layout(),
+                            SRC_Matrix.get_nrow(),
+                            SRC_Matrix.get_ncol(),
+                            H_2d,
+                            S_2d,
+                            ZERO_Limit,
+                            nnz,
+                            rowidx,
+                            colidx);
+        }
+        // build all2all transformation parameters and the map index of receiver buffer
+        std::vector<int> buffer2ccsIndex;
+        buildTransformParameter(SRC_Matrix,
+                                DST_Matrix,
+                                NPROC_TRANS,
+                                GROUP_TRANS,
+                                COMM_TRANS,
+                                nnz,
+                                rowidx,
+                                colidx,
+                                sender_size,
+                                sender_size_process,
+                                sender_displacement_process,
+                                receiver_size,
+                                receiver_size_process,
+                                receiver_displacement_process,
+                                buffer2ccsIndex);
+// Do transformation
+        std::vector<double> sender_buffer(sender_size);
+        std::vector<double> receiver_buffer(receiver_size);
+        // put H to sender buffer
+        if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r')
+        {
+            for (int i = 0; i < sender_size; ++i)
+            {
+                sender_buffer[i] = H_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
+            }
+        }
+        else
+        {
+            for (int i = 0; i < sender_size; ++i)
+            {
+                sender_buffer[i] = H_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
+            }
+        }
+        // do all2all transformation
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
+// collect H from receiver buffer
+        delete[] H_ccs;
+        H_ccs = new double[receiver_size];
+        buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], H_ccs);
+
+        // put S to sender buffer
+        if (SRC_Matrix.get_layout() == 'R' || SRC_Matrix.get_layout() == 'r')
+        {
+            for (int i = 0; i < sender_size; ++i)
+            {
+                sender_buffer[i] = S_2d[rowidx[i] * SRC_Matrix.get_ncol() + colidx[i]];
+            }
+        }
+        else
+        {
+            for (int i = 0; i < sender_size; ++i)
+            {
+                sender_buffer[i] = S_2d[colidx[i] * SRC_Matrix.get_nrow() + rowidx[i]];
+            }
+        }
+        // do all2all transformation
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
+// collect S from receiver buffer
+        delete[] S_ccs;
+        S_ccs = new double[receiver_size];
+        buffer2CCSvalue(receiver_size, &buffer2ccsIndex[0], &receiver_buffer[0], S_ccs);
+    }
+    // clear and return
+    deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
+    return 0;
+}
+
+// transform two sparse matrices from Compressed Column Storage (CCS) to block cyclic distribution (BCD) distribution
+// two source matrices share the same non-zero elements positions
+int DistMatrixTransformer::transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
+                                             double* DMnzvalLocal,
+                                             double* EDMnzvalLocal,
+                                             DistBCDMatrix& DST_Matrix,
+                                             double* DM,
+                                             double* EDM)
+{
+    int myproc;
+    MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
+    MPI_Group GROUP_TRANS;
+    MPI_Comm COMM_TRANS = MPI_COMM_NULL;
+    newGroupCommTrans(DST_Matrix, SRC_Matrix, GROUP_TRANS, COMM_TRANS);
+    if (COMM_TRANS != MPI_COMM_NULL)
+    {
+        // init DM and EDM with 0
+        for (int i = 0; i < DST_Matrix.get_nrow() * DST_Matrix.get_ncol(); ++i)
+        {
+            DM[i] = 0;
+            EDM[i] = 0;
+        }
+        // setup number of local elements to be transfered to each remote processes
+        int NPROC_TRANS;
+        MPI_Comm_size(COMM_TRANS, &NPROC_TRANS);
+        int sender_size_process[NPROC_TRANS];
+        int sender_displacement_process[NPROC_TRANS];
+        int receiver_size_process[NPROC_TRANS];
+        int receiver_displacement_process[NPROC_TRANS];
+        int nproc_bcd;
+        std::vector<int> proc_map_bcd_trans;
+        int myproc_trans;
+        MPI_Comm_rank(COMM_TRANS, &myproc_trans);
+        if (myproc_trans == 0)
+        {
+            MPI_Group_size(DST_Matrix.get_group(), &nproc_bcd);
+            MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
+            proc_map_bcd_trans.resize(nproc_bcd, 0);
+            for (int i = 0; i < nproc_bcd; ++i)
+            {
+                MPI_Group_translate_ranks(DST_Matrix.get_group(), 1, &i, GROUP_TRANS, &proc_map_bcd_trans[i]);
+            }
+            MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
+        }
+        else
+        {
+            MPI_Bcast(&nproc_bcd, 1, MPI_INT, 0, COMM_TRANS);
+            proc_map_bcd_trans.resize(nproc_bcd, 0);
+            MPI_Bcast(&proc_map_bcd_trans[0], nproc_bcd, MPI_INT, 0, COMM_TRANS);
+        }
+        // setup sender_size_process
+        // std::fill(sender_size_process.begin(), sender_size_process.end(), 0);
+        for (int i = 0; i < NPROC_TRANS; ++i)
+            sender_size_process[i] = 0;
+        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
+        {
+            int g_col = SRC_Matrix.globalCol(icol);
+            int recv_pcol_bcd;
+            int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
+    
+            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
+            {
+                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
+                int recv_prow_bcd;
+                int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
+                int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
+                int recv_proc = proc_map_bcd_trans[recv_proc_bcd];
+                ++sender_size_process[recv_proc];
+            }
+            // log<<"\n";
+        }
+        // setup receiver_size_process
+        // std::fill(receiver_size_process.begin(), receiver_size_process.end(), 0);
+        for (int i = 0; i < NPROC_TRANS; ++i)
+            receiver_size_process[i] = 0;
+        MPI_Alltoall(&sender_size_process[0], 1, MPI_INT, &receiver_size_process[0], 1, MPI_INT, COMM_TRANS);
+        // setup sender_displacement and receiver_displacement
+        sender_displacement_process[0] = 0;
+        receiver_displacement_process[0] = 0;
+        int receiver_size = receiver_size_process[0];
+        for (int i = 1; i < NPROC_TRANS; ++i)
+        {
+            sender_displacement_process[i] = sender_displacement_process[i - 1] + sender_size_process[i - 1];
+            receiver_displacement_process[i] = receiver_displacement_process[i - 1] + receiver_size_process[i - 1];
+            receiver_size += receiver_size_process[i];
+        }
+
+        // setup up sender index and receiver index
+        int sender_size = SRC_Matrix.get_nnzlocal();
+        int* sender_index;
+        double* sender_buffer;
+        int* dst_index;
+        int* receiver_index;
+        double* receiver_buffer;
+
+        if (sender_size > 0)
+        {
+            sender_index = new int[sender_size];
+            for (int i = 0; i < sender_size; ++i)
+            {
+                sender_index[i] = -1;
+            }
+            sender_buffer = new double[sender_size];
+            dst_index = new int[2 * sender_size];
+            for (int i = 0; i < 2 * sender_size; ++i)
+            {
+                dst_index[i] = -1;
+            }
+        }
+        else
+        {
+            sender_index = new int[1];
+            sender_index[0] = -1;
+            sender_buffer = new double[1];
+            dst_index = new int[2];
+            dst_index[0] = -1;
+            dst_index[1] = -1;
+        }
+
+        if (receiver_size > 0)
+        {
+            receiver_index = new int[2 * receiver_size];
+            receiver_buffer = new double[receiver_size];
+            for (int i = 0; i < 2 * receiver_size; ++i)
+            {
+                receiver_index[i] = -1;
+            }
+            for (int i = 0; i < receiver_size; ++i)
+            {
+                receiver_buffer[i] = -1;
+            }
+        }
+        else
+        {
+            receiver_index = new int[2];
+            receiver_buffer = new double[1];
+            receiver_index[0] = -1;
+            receiver_index[1] = -1;
+            receiver_buffer[0] = -1;
+        }
+
+        // pointer to the first empty slot of each process
+        // std::vector<int> p(sender_displacement_process);
+        int p[NPROC_TRANS];
+        for (int i = 0; i < NPROC_TRANS; ++i)
+        {
+            p[i] = sender_displacement_process[i];
+        }
+
+        int idx = 0;
+
+        for (int icol = 0; icol < SRC_Matrix.get_numcol_local(); ++icol)
+        {
+            int g_col = SRC_Matrix.globalCol(icol);
+            int recv_pcol_bcd;
+            int recv_col = DST_Matrix.localCol(g_col, recv_pcol_bcd);
+            for (int rowidx = SRC_Matrix.get_colptr_local()[icol] - 1; rowidx < SRC_Matrix.get_colptr_local()[icol + 1] - 1; ++rowidx)
+            {
+                int g_row = SRC_Matrix.get_rowind_local()[rowidx] - 1;
+                int recv_prow_bcd;
+                int recv_row = DST_Matrix.localRow(g_row, recv_prow_bcd);
+
+                int recv_proc_bcd = DST_Matrix.pnum(recv_prow_bcd, recv_pcol_bcd);
+
+                int recv_proc = proc_map_bcd_trans[recv_proc_bcd];
+
+                sender_index[p[recv_proc]] = idx;
+
+                dst_index[p[recv_proc] * 2] = recv_row;
+                dst_index[p[recv_proc] * 2 + 1] = recv_col;
+                ++p[recv_proc];
+                ++idx;
+            }
+        }
+
+        for (int i = 0; i < NPROC_TRANS; ++i)
+        {
+            sender_size_process[i] *= 2;
+            sender_displacement_process[i] *= 2;
+            receiver_size_process[i] *= 2;
+            receiver_displacement_process[i] *= 2;
+        }
+
+        MPI_Alltoallv(&dst_index[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_INT,
+                      &receiver_index[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_INT,
+                      COMM_TRANS);
+
+        // reset size and displacement for transfering matrix value by alltoall
+        for (int i = 0; i < NPROC_TRANS; ++i)
+        {
+            sender_size_process[i] /= 2;
+            sender_displacement_process[i] /= 2;
+            receiver_size_process[i] /= 2;
+            receiver_displacement_process[i] /= 2;
+        }
+
+        // transfer DM
+        // set up DM sender buffer
+        for (int i = 0; i < sender_size; ++i)
+        {
+            sender_buffer[i] = DMnzvalLocal[sender_index[i]];
+        }
+
+        // transfer sender buffer to receiver buffer
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
+
+        // transform receiver_buffer to DM
+        if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r')
+        {
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            for (int i = 0; i < receiver_size; ++i)
+            {
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = ix * DST_Matrix.get_ncol() + iy;
+                DM[idx] = receiver_buffer[i];
+            }
+        }
+        else
+        {
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            for (int i = 0; i < receiver_size; ++i)
+            {
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = iy * DST_Matrix.get_nrow() + ix;
+                DM[idx] = receiver_buffer[i];
+            }
+        }
+
+        // setup up sender buffer of EDM
+        for (int i = 0; i < sender_size; ++i)
+        {
+            sender_buffer[i] = EDMnzvalLocal[sender_index[i]];
+        }
+
+        // transfer sender buffer to receiver buffer
+        MPI_Alltoallv(&sender_buffer[0],
+                      &sender_size_process[0],
+                      &sender_displacement_process[0],
+                      MPI_DOUBLE,
+                      &receiver_buffer[0],
+                      &receiver_size_process[0],
+                      &receiver_displacement_process[0],
+                      MPI_DOUBLE,
+                      COMM_TRANS);
+
+        // transform receiver_buffer to EDM
+        if (DST_Matrix.get_layout() == 'R' || DST_Matrix.get_layout() == 'r')
+        {
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            for (int i = 0; i < receiver_size; ++i)
+            {
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = ix * DST_Matrix.get_ncol() + iy;
+                EDM[idx] = receiver_buffer[i];
+            }
+        }
+        else
+        {
+            int DST_Matrix_elem = DST_Matrix.get_nrow() * DST_Matrix.get_ncol();
+            for (int i = 0; i < receiver_size; ++i)
+            {
+                int ix = receiver_index[2 * i];
+                int iy = receiver_index[2 * i + 1];
+                int idx = iy * DST_Matrix.get_nrow() + ix;
+                EDM[idx] = receiver_buffer[i];
+            }
+        }
+
+        delete[] sender_index;
+        delete[] sender_buffer;
+        delete[] dst_index;
+        delete[] receiver_index;
+        delete[] receiver_buffer;
+
+    }
+    deleteGroupCommTrans(GROUP_TRANS, COMM_TRANS);
+    return 0;
+}
+
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/dist_matrix_transformer.h b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
new file mode 100644
index 0000000000..672b22f4f3
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/dist_matrix_transformer.h
@@ -0,0 +1,91 @@
+#ifndef DISTMATRIXTRANSFORMER_H
+#define DISTMATRIXTRANSFORMER_H
+
+#include <mpi.h>
+#include <map>
+#include <vector>
+// transform a sparse matrix from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// they should have same MPI communicator
+// The local matrix of BCD is column-major order
+// int transformBCDtoCCS(DistBCDMatrix &SRC_Matrix, double* H_2d, const double ZERO_Limit,
+//                    DistCCSMatrix &DST_Matrix, double*& H_ccs);
+
+// transform two sparse matrices from block cyclic distribution (BCD) to Compressed Column Storage (CCS) distribution
+// two destination matrices share the same non-zero elements positions
+// if either of two elements in source matrices is non-zeros, the elements in the destination matrices are non-zero,
+// even if one of them is acturely zero All matrices must have same MPI communicator
+namespace pexsi
+{
+class DistBCDMatrix;
+class DistCCSMatrix;
+
+namespace DistMatrixTransformer
+{
+int MinimumIndexPosition(const bool isFirst,
+                         const int nprocs,
+                         int* size_process,
+                         int* displacement_process,
+                         const int* index);
+
+void buildCCSParameter(const int size,
+                       const int nprocs,
+                       std::vector<int> size_process,
+                       std::vector<int> displacement_process,
+                       const int* position_index,
+                       DistCCSMatrix& DST_Matrix,
+                       int* buffer2ccsIndex);
+
+void buffer2CCSvalue(int nnzLocal, int* buffer2ccsIndex, double* buffer, double* nzvalLocal);
+
+void countMatrixDistribution(int N, double* A, std::map<int, int>& P);
+
+int getNonZeroIndex(char layout,
+                    const int nrow,
+                    const int ncol,
+                    double* H_2d,
+                    double* S_2d,
+                    const double ZERO_Limit,
+                    int& nnz,
+                    std::vector<int>& rowidx,
+                    std::vector<int>& colidx);
+
+int buildTransformParameter(DistBCDMatrix& SRC_Matrix,
+                            DistCCSMatrix& DST_Matrix,
+                            const int NPROC_TRANS,
+                            MPI_Group& GROUP_TRANS,
+                            MPI_Comm& COMM_TRANS,
+                            const int nnz,
+                            std::vector<int>& rowidx,
+                            std::vector<int>& colidx,
+                            int& sender_size,
+                            std::vector<int>& sender_size_process,
+                            std::vector<int>& sender_displacement_process,
+                            int& receiver_size,
+                            std::vector<int>& receiver_size_process,
+                            std::vector<int>& receiver_displacement_process,
+                            std::vector<int>& buffer2ccsIndex);
+
+int newGroupCommTrans(DistBCDMatrix& SRC_Matrix,
+                      DistCCSMatrix& DST_Matrix,
+                      MPI_Group& GROUP_TRANS,
+                      MPI_Comm& COMM_TRANS);
+
+int deleteGroupCommTrans(MPI_Group& GROUP_TRANS, MPI_Comm& COMM_TRANS);
+
+int transformBCDtoCCS(DistBCDMatrix& SRC_Matrix,
+                      double* H_2d,
+                      double* S_2d,
+                      const double ZERO_Limit,
+                      DistCCSMatrix& DST_Matrix,
+                      double*& H_ccs,
+                      double*& S_ccs);
+
+int transformCCStoBCD(DistCCSMatrix& SRC_Matrix,
+                      double* DMnzvalLocal,
+                      double* ENDnzvalLocal,
+                      DistBCDMatrix& DST_Matrix,
+                      double* DM_2d,
+                      double* ED_2d);
+}; // namespace DistMatrixTransformer
+} // namespace pexsi
+#endif // DISTMATRIXTRANSFORMER_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.cpp b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
new file mode 100644
index 0000000000..7a71e6ca01
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.cpp
@@ -0,0 +1,121 @@
+#include "module_base/parallel_global.h"
+#ifdef __PEXSI
+#include "pexsi_solver.h"
+
+#include <mpi.h>
+#include <cstring>
+#include <vector>
+
+#include "module_base/global_variable.h"
+#include "simple_pexsi.h"
+
+extern MPI_Comm DIAG_WORLD;
+extern MPI_Comm GRID_WORLD;
+namespace pexsi
+{
+
+int PEXSI_Solver::pexsi_npole = 0;
+bool PEXSI_Solver::pexsi_inertia = 0;
+int PEXSI_Solver::pexsi_nmax = 0;
+// int PEXSI_Solver::pexsi_symbolic = 0;
+bool PEXSI_Solver::pexsi_comm = 0;
+bool PEXSI_Solver::pexsi_storage = 0;
+int PEXSI_Solver::pexsi_ordering = 0;
+int PEXSI_Solver::pexsi_row_ordering = 0;
+int PEXSI_Solver::pexsi_nproc = 0;
+bool PEXSI_Solver::pexsi_symm = 0;
+bool PEXSI_Solver::pexsi_trans = 0;
+int PEXSI_Solver::pexsi_method = 0;
+int PEXSI_Solver::pexsi_nproc_pole = 0;
+// double PEXSI_Solver::pexsi_spin = 2;
+double PEXSI_Solver::pexsi_temp = 0.0;
+double PEXSI_Solver::pexsi_gap = 0.0;
+double PEXSI_Solver::pexsi_delta_e = 0.0;
+double PEXSI_Solver::pexsi_mu_lower = 0.0;
+double PEXSI_Solver::pexsi_mu_upper = 0.0;
+double PEXSI_Solver::pexsi_mu = 0.0;
+double PEXSI_Solver::pexsi_mu_thr = 0.0;
+double PEXSI_Solver::pexsi_mu_expand = 0.0;
+double PEXSI_Solver::pexsi_mu_guard = 0.0;
+double PEXSI_Solver::pexsi_elec_thr = 0.0;
+double PEXSI_Solver::pexsi_zero_thr = 0.0;
+
+void PEXSI_Solver::prepare(const int blacs_text,
+                           const int nb,
+                           const int nrow,
+                           const int ncol,
+                           const double* h,
+                           const double* s,
+                           double*& _DM,
+                           double*& _EDM)
+{
+    this->blacs_text = blacs_text;
+    this->nb = nb;
+    this->nrow = nrow;
+    this->ncol = ncol;
+    this->h = const_cast<double*>(h);
+    this->s = const_cast<double*>(s);
+    this->DM = _DM;
+    this->EDM = _EDM;
+    this->totalEnergyH = 0.0;
+    this->totalEnergyS = 0.0;
+    this->totalFreeEnergy = 0.0;
+}
+
+int PEXSI_Solver::solve(double mu0)
+{
+    MPI_Group grid_group;
+    int myid, grid_np;
+    MPI_Group world_group;
+    MPI_Comm_rank(DIAG_WORLD, &myid);
+    MPI_Comm_size(DIAG_WORLD, &grid_np);
+    MPI_Comm_group(DIAG_WORLD, &world_group);
+
+    int grid_proc_range[3]={0, (GlobalV::NPROC/grid_np)*grid_np-1, GlobalV::NPROC/grid_np};
+    MPI_Group_range_incl(world_group, 1, &grid_proc_range, &grid_group);
+
+    simplePEXSI(DIAG_WORLD,
+                DIAG_WORLD,
+                grid_group,
+                this->blacs_text,
+                GlobalV::NLOCAL,
+                this->nb,
+                this->nrow,
+                this->ncol,
+                'c',
+                this->h,
+                this->s,
+                GlobalV::nelec,
+                "PEXSIOPTION",
+                this->DM,
+                this->EDM,
+                this->totalEnergyH,
+                this->totalEnergyS,
+                this->totalFreeEnergy,
+                mu,
+                mu0);
+    return 0;
+}
+
+const double PEXSI_Solver::get_totalFreeEnergy() const
+{
+    return totalFreeEnergy;
+}
+
+const double PEXSI_Solver::get_totalEnergyH() const
+{
+    return totalEnergyH;
+}
+
+const double PEXSI_Solver::get_totalEnergyS() const
+{
+    return totalEnergyS;
+}
+
+const double PEXSI_Solver::get_mu() const
+{
+    return mu;
+}
+
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/pexsi_solver.h b/source/module_hsolver/module_pexsi/pexsi_solver.h
new file mode 100644
index 0000000000..b041d13656
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/pexsi_solver.h
@@ -0,0 +1,143 @@
+#ifndef PEXSI_Solver_H
+#define PEXSI_Solver_H
+
+#include <vector>
+
+namespace pexsi
+{
+class PEXSI_Solver
+{
+  public:
+    void prepare(const int blacs_text,
+                 const int nb,
+                 const int nrow,
+                 const int ncol,
+                 const double* h,
+                 const double* s,
+                 double*& DM,
+                 double*& EDM);
+    int solve(double mu0);
+    const double get_totalFreeEnergy() const;
+    const double get_totalEnergyH() const;
+    const double get_totalEnergyS() const;
+    const double get_mu() const;
+
+    //==========================================================
+    // PEXSI related variables
+    //==========================================================
+    /** 
+     * @brief  Number of terms in the pole expansion.
+     */ 
+    static int pexsi_npole;
+    /** 
+     * @brief  Whether inertia counting is used at the very beginning.
+     */ 
+    static bool pexsi_inertia;
+    /** 
+     * @brief  Maximum number of PEXSI iterations after each inertia counting procedure.
+     */ 
+    static int pexsi_nmax;
+    /** 
+     * @brief  Whether to construct PSelInv communication pattern.
+     */ 
+    static bool pexsi_comm;
+    /** 
+     * @brief  Whether to use symmetric storage space used by the Selected Inversion algorithm for symmetric matrices.  
+     */ 
+    static bool pexsi_storage;
+    /** 
+     * @brief  Ordering strategy for factorization and selected inversion. 
+     */ 
+    static int pexsi_ordering;
+    /** 
+     * @brief  row permutation strategy for factorization and selected inversion.  
+     */ 
+    static int pexsi_row_ordering;
+    /** 
+     * @brief  Number of processors for PARMETIS/PT-SCOTCH.  Only used if the ordering == 0.
+     */ 
+    static int pexsi_nproc;
+    /** 
+     * @brief  Matrix structure.
+     * - = 0   : Unsymmetric matrix
+     * - = 1   : Symmetric matrix (default).
+     */ 
+    static bool pexsi_symm;
+    /** 
+     * @brief  Transpose.
+     * - = 0   : Factor non transposed matrix (default).
+     * - = 1   : Factor transposed matrix.
+     */ 
+    static bool pexsi_trans;
+    /** 
+     * @brief  The pole expansion method to be used.
+     * - = 1   : Cauchy Contour Integral method used.
+     * - = 2   : Moussa optimized method.
+     */ 
+    static int pexsi_method;
+    /** 
+     * @brief  The point parallelizaion of PEXSI.
+     * - = 2  : Recommend two points parallelization
+     */ 
+    static int pexsi_nproc_pole;
+    /** 
+     * @brief  Temperature, in the same unit as H 
+     */ 
+    static double pexsi_temp;
+    /** 
+     * @brief  Spectral gap. **Note** This can be set to be 0 in most cases.
+     */ 
+    static double pexsi_gap;
+    /** 
+     * @brief  An upper bound for the spectral radius of \f$S^{-1} H\f$.
+     */ 
+    static double pexsi_delta_e;
+    /** 
+     * @brief  Initial guess of lower bound for mu.
+     */ 
+    static double pexsi_mu_lower;
+    /** 
+     * @brief  Initial guess of upper bound for mu.
+     */ 
+    static double pexsi_mu_upper;
+    /** 
+     * @brief  Initial guess for mu (for the solver) (AG)
+     */ 
+    static double pexsi_mu;
+    /** 
+     * @brief  Stopping criterion in terms of the chemical potential for the inertia counting procedure.
+     */ 
+    static double pexsi_mu_thr;
+    /** 
+     * @brief  If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion.
+     */ 
+    static double pexsi_mu_expand;
+    /** 
+     * @brief  Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure.
+     */ 
+    static double pexsi_mu_guard;
+    /** 
+     * @brief  Stopping criterion of the %PEXSI iteration in terms of the number of electrons compared to numElectronExact.
+     */ 
+    static double pexsi_elec_thr;
+    /** 
+     * @brief  If the absolute value of CCS matrix element is less than this value, it will be considered as zero.
+     */ 
+    static double pexsi_zero_thr;
+
+  private:
+    int blacs_text;
+    int nb;
+    int nrow;
+    int ncol;
+    double* h;
+    double* s;
+    double* DM;
+    double* EDM;
+    double totalEnergyH;
+    double totalEnergyS;
+    double totalFreeEnergy;
+    double mu;
+};
+} // namespace pexsi
+#endif // PEXSI_Solver_H
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.cpp b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
new file mode 100644
index 0000000000..075c803182
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.cpp
@@ -0,0 +1,365 @@
+// use PEXSI to solve a Kohn-Sham equation
+// the H and S matrices are given by 2D block cyclic distribution
+// the Density Matrix and Energy Density Matrix calculated by PEXSI are transformed to 2D block cyclic distribution
+// #include "mpi.h"
+#ifdef __PEXSI
+#include <mpi.h>
+
+#include <cfloat>
+#include <cmath>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <memory>
+
+#include "c_pexsi_interface.h"
+#include "dist_bcd_matrix.h"
+#include "dist_ccs_matrix.h"
+#include "dist_matrix_transformer.h"
+#include "module_base/lapack_connector.h"
+#include "module_base/timer.h"
+#include "module_base/tool_quit.h"
+#include "module_base/global_variable.h"
+#include "module_hsolver/diago_pexsi.h"
+
+namespace pexsi
+{
+inline void strtolower(char* sa, char* sb)
+{
+    char c;
+    int len = strlen(sa);
+    for (int i = 0; i < len; i++)
+    {
+        c = sa[i];
+        sb[i] = tolower(c);
+    }
+    sb[len] = '\0';
+}
+
+inline void setDefaultOption(int* int_para, double* double_para)
+{
+    double_para[0] = 2;
+    double_para[2] = 0;
+    double_para[11] = DBL_MIN;
+    int_para[3] = 0;
+    int_para[6] = 1;
+    int_para[8] = 0;
+    int_para[9] = 0;
+    int_para[11] = 0;
+    int_para[12] = 0;
+    int_para[14] = 2;
+    int_para[15] = 1;
+}
+
+int loadPEXSIOption(MPI_Comm comm,
+                    const std::string PexsiOptionFile,
+                    PPEXSIOptions& options,
+                    int& numProcessPerPole,
+                    double& ZERO_Limit)
+{
+
+    // temp variable arrays read from conf file and will be bcast to all processors
+
+    // parameter array of type int,
+    //  0: numPole
+    //  1: isInertiaCount
+    //  2: maxPEXSIIter
+    //  3: matrixType
+    //  4: isSymbolicFactorize
+    //  5: isConstructCommPattern
+    //  6: solver
+    //  7: symmetricStorage
+    //  8: ordering
+    //  9: rowOrdering
+    // 10: npSymbFact
+    // 11: symmetric
+    // 12: transpose
+    // 13: method
+    // 14: nPoints
+    // 15: verbosity
+    // 16: numProcessPerPole
+    int int_para[17];
+
+    // parameter array of type double
+    //  0: spin
+    //  1: temperature
+    //  2: gap
+    //  3: deltaE
+    //  4: muMin0
+    //  5: muMax0
+    //  6: mu0
+    //  7: muInertiaTolerance
+    //  8: muInertiaExpansion
+    //  9: muPEXSISafeGuard
+    // 10: numElectronPEXSITolerance
+    // 11: ZERO_Limit
+    double double_para[12];
+
+    // read in PEXSI options from GlobalV
+    int_para[0] = pexsi::PEXSI_Solver::pexsi_npole;
+    int_para[1] = pexsi::PEXSI_Solver::pexsi_inertia;
+    int_para[2] = pexsi::PEXSI_Solver::pexsi_nmax;
+    int_para[3] = 0;
+    int_para[4] = 1; // pexsi::PEXSI_Solver::pexsi_symbolic;
+    int_para[5] = pexsi::PEXSI_Solver::pexsi_comm;
+    int_para[6] = 0;
+    int_para[7] = pexsi::PEXSI_Solver::pexsi_storage;
+    int_para[8] = pexsi::PEXSI_Solver::pexsi_ordering;
+    int_para[9] = pexsi::PEXSI_Solver::pexsi_row_ordering;
+    int_para[10] = pexsi::PEXSI_Solver::pexsi_nproc;
+    int_para[11] = pexsi::PEXSI_Solver::pexsi_symm;
+    int_para[12] = pexsi::PEXSI_Solver::pexsi_trans;
+    int_para[13] = pexsi::PEXSI_Solver::pexsi_method;
+    int_para[14] = 2;
+    int_para[15] = 0;
+    int_para[16] = pexsi::PEXSI_Solver::pexsi_nproc_pole;
+
+    double_para[0] = GlobalV::NSPIN; // pexsi::PEXSI_Solver::pexsi_spin;
+    double_para[1] = pexsi::PEXSI_Solver::pexsi_temp;
+    double_para[2] = pexsi::PEXSI_Solver::pexsi_gap;
+    double_para[3] = pexsi::PEXSI_Solver::pexsi_delta_e;
+    double_para[4] = pexsi::PEXSI_Solver::pexsi_mu_lower;
+    double_para[5] = pexsi::PEXSI_Solver::pexsi_mu_upper;
+    double_para[6] = pexsi::PEXSI_Solver::pexsi_mu;
+    double_para[7] = pexsi::PEXSI_Solver::pexsi_mu_thr;
+    double_para[8] = pexsi::PEXSI_Solver::pexsi_mu_expand;
+    double_para[9] = pexsi::PEXSI_Solver::pexsi_mu_guard;
+    double_para[10] = pexsi::PEXSI_Solver::pexsi_elec_thr;
+    double_para[11] = pexsi::PEXSI_Solver::pexsi_zero_thr;
+
+    options.numPole = int_para[0];
+    options.isInertiaCount = int_para[1];
+    options.maxPEXSIIter = int_para[2];
+    options.matrixType = int_para[3];
+    options.isSymbolicFactorize = int_para[4];
+    options.isConstructCommPattern = int_para[5];
+    options.solver = int_para[6];
+    options.symmetricStorage = int_para[7];
+    options.ordering = int_para[8];
+    options.rowOrdering = int_para[9];
+    options.npSymbFact = int_para[10];
+    options.symmetric = int_para[11];
+    options.transpose = int_para[12];
+    options.method = int_para[13];
+    options.nPoints = int_para[14];
+    options.verbosity = int_para[15];
+    numProcessPerPole = int_para[16];
+
+    options.spin = double_para[0];
+    options.temperature = double_para[1];
+    options.gap = double_para[2];
+    options.deltaE = double_para[3];
+    options.muMin0 = double_para[4];
+    options.muMax0 = double_para[5];
+    options.mu0 = double_para[6];
+    options.muInertiaTolerance = double_para[7];
+    options.muInertiaExpansion = double_para[8];
+    options.muPEXSISafeGuard = double_para[9];
+    options.numElectronPEXSITolerance = double_para[10];
+    ZERO_Limit = double_para[11];
+
+    return 0;
+}
+
+void splitNProc2NProwNPcol(const int NPROC, int& nprow, int& npcol)
+{
+    int integral_part = (int)sqrt(NPROC);
+    if (NPROC % integral_part == 0)
+    {
+        nprow = integral_part;
+        npcol = NPROC / integral_part;
+    }
+    else
+    {
+        int flag;
+        int i;
+        int low = pow(integral_part, 2);
+        int high = pow(integral_part + 1, 2);
+        if ((NPROC - low) >= (high - NPROC))
+        {
+            flag = integral_part + 1;
+        }
+        else
+        {
+            flag = integral_part;
+        }
+        for (i = flag; i > 0; ++i)
+        {
+            if (NPROC % i == 0)
+                break;
+        }
+        nprow = i;
+        npcol = NPROC / i;
+    }
+}
+
+int simplePEXSI(MPI_Comm comm_PEXSI,
+                MPI_Comm comm_2D,
+                MPI_Group group_2D,
+                const int blacs_ctxt, // communicator parameters
+                const int size,
+                const int nblk,
+                const int nrow,
+                const int ncol,
+                char layout, // matrix parameters
+                double* H,
+                double* S, // input matrices
+                const double numElectronExact,
+                const std::string PexsiOptionFile, // pexsi parameters file
+                double*& DM,
+                double*& EDM, // output matrices
+                double& totalEnergyH,
+                double& totalEnergyS,
+                double& totalFreeEnergy, // output energy
+                double& mu,
+                double mu0)
+{
+
+    if (comm_2D == MPI_COMM_NULL && comm_PEXSI == MPI_COMM_NULL)
+        return 0;
+    int myid;
+    std::ofstream f_log;
+    if (comm_PEXSI != MPI_COMM_NULL)
+    {
+        MPI_Comm_rank(comm_PEXSI, &myid);
+    }
+
+    //  set up PEXSI parameter
+    PPEXSIOptions options;
+    PPEXSISetDefaultOptions(&options);
+    int numProcessPerPole;
+    double ZERO_Limit;
+    loadPEXSIOption(comm_PEXSI, PexsiOptionFile, options, numProcessPerPole, ZERO_Limit);
+    options.mu0 = mu0;
+
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan");
+    PPEXSIPlan plan;
+    int info;
+    int outputFileIndex;
+    int pexsi_prow, pexsi_pcol;
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol");
+    splitNProc2NProwNPcol(numProcessPerPole, pexsi_prow, pexsi_pcol);
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "splitNProc2NProwNPcol");
+
+    outputFileIndex = -1;
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
+    if (comm_PEXSI != MPI_COMM_NULL)
+    {
+        plan = PPEXSIPlanInitialize(comm_PEXSI, pexsi_prow, pexsi_pcol, outputFileIndex, &info);
+    }
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIPlanInit");
+    
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "setup_PEXSI_plan");
+
+    // create compressed column storage distribution matrix parameter
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"create compressed column storage distribution matrix parameter, begin");
+    DistCCSMatrix DST_Matrix(comm_PEXSI, numProcessPerPole, size);
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"create compressed column storage distribution matrix parameter, finish");
+
+
+    // create block cyclic distribution matrix parameter
+    DistBCDMatrix SRC_Matrix(comm_2D, group_2D, blacs_ctxt, size, nblk, nrow, ncol, layout);
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    // DONE(ofs_running,"create block cyclic distribution matrix parameter, finish");
+    double* HnzvalLocal = nullptr;
+    double* SnzvalLocal = nullptr;
+    double* DMnzvalLocal = nullptr;
+    double* EDMnzvalLocal = nullptr;
+    double* FDMnzvalLocal = nullptr;
+    // transform H and S from 2D block cyclic distribution to compressed column sparse matrix
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    DistMatrixTransformer::transformBCDtoCCS(SRC_Matrix, H, S, ZERO_Limit, DST_Matrix, HnzvalLocal, SnzvalLocal);
+    // MPI_Barrier(MPI_COMM_WORLD);
+    // LiuXh modify 2021-03-30, add DONE(ofs_running,"xx") for test
+    if (comm_PEXSI != MPI_COMM_NULL)
+    {
+
+        // Load H and S to PEXSI
+        int isSIdentity = 0;
+        PPEXSILoadRealHSMatrix(plan,
+                               options,
+                               size,
+                               DST_Matrix.get_nnz(),
+                               DST_Matrix.get_nnzlocal(),
+                               DST_Matrix.get_numcol_local(),
+                               DST_Matrix.get_colptr_local(),
+                               DST_Matrix.get_rowind_local(),
+                               HnzvalLocal,
+                               isSIdentity,
+                               SnzvalLocal,
+                               &info);
+
+        double nelec;
+        double muMinInertia;
+        double muMaxInertia;
+        int numTotalPEXSIIter;
+        int numTotalInertiaIter; // Number of total inertia[out]
+        // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+        ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
+        PPEXSIDFTDriver(plan,                 // PEXSI plan[in]
+                        options,              // PEXSI Options[in]
+                        numElectronExact,     // exact electron number[in]
+                        &mu,                  // chemical potential[out]
+                        &nelec,               // number of electrons[out]
+                        &muMinInertia,        // Lower bound for mu after the last inertia[out]
+                        &muMaxInertia,        // Upper bound for mu after the last inertia[out]
+                        &numTotalInertiaIter, // Number of total inertia[out]
+                        &numTotalPEXSIIter,   // number of total pexsi evaluation procedure[out]
+                        &info);               // 0: successful; otherwise: unsuccessful
+        // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+        ModuleBase::timer::tick("Diago_LCAO_Matrix", "PEXSIDFT");
+
+        // retrieve the results from the plan
+        if (DMnzvalLocal != nullptr)
+            delete[] DMnzvalLocal;
+        if (EDMnzvalLocal != nullptr)
+            delete[] EDMnzvalLocal;
+        if (FDMnzvalLocal != nullptr)
+            delete[] FDMnzvalLocal;
+        DMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        EDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        FDMnzvalLocal = new double[DST_Matrix.get_nnzlocal()];
+        if (myid < numProcessPerPole)
+        {
+            PPEXSIRetrieveRealDFTMatrix(plan,
+                                        DMnzvalLocal,
+                                        EDMnzvalLocal,
+                                        FDMnzvalLocal,
+                                        &totalEnergyH,
+                                        &totalEnergyS,
+                                        &totalFreeEnergy,
+                                        &info);
+        }
+        // clean PEXSI
+        PPEXSIPlanFinalize(plan, &info);
+    }
+
+    // transform Density Matrix and Energy Density Matrix from compressed column sparse matrix
+    // back to 2D block cyclic distribution if neccessary
+    if (comm_2D != MPI_COMM_NULL)
+    {
+        // delete[] DM;
+        // delete[] EDM;
+        // DM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
+        // EDM = new double[SRC_Matrix.get_nrow() * SRC_Matrix.get_ncol()];
+    }
+    // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
+    DistMatrixTransformer::transformCCStoBCD(DST_Matrix, DMnzvalLocal, EDMnzvalLocal, SRC_Matrix, DM, EDM);
+    ModuleBase::timer::tick("Diago_LCAO_Matrix", "TransMAT22D");
+    // LiuXh modify 2021-04-29, add DONE(ofs_running,"xx") for test
+
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Barrier(MPI_COMM_WORLD);
+    delete[] DMnzvalLocal;
+    delete[] EDMnzvalLocal;
+    delete[] FDMnzvalLocal;
+    delete[] HnzvalLocal;
+    delete[] SnzvalLocal;
+    MPI_Barrier(MPI_COMM_WORLD);
+    return 0;
+}
+} // namespace pexsi
+#endif
\ No newline at end of file
diff --git a/source/module_hsolver/module_pexsi/simple_pexsi.h b/source/module_hsolver/module_pexsi/simple_pexsi.h
new file mode 100644
index 0000000000..db8879e5ac
--- /dev/null
+++ b/source/module_hsolver/module_pexsi/simple_pexsi.h
@@ -0,0 +1,29 @@
+#ifndef SIMPLE_PEXSI_H
+#define SIMPLE_PEXSI_H
+
+#include <mpi.h>
+// a simple interface for calling pexsi with 2D block cyclic distributed matrix
+namespace pexsi
+{
+int simplePEXSI(MPI_Comm comm_PEXSI,
+                MPI_Comm comm_2D,
+                MPI_Group group_2D,
+                const int blacs_ctxt, // communicator parameters
+                const int size,
+                const int nblk,
+                const int nrow,
+                const int ncol,
+                char layout, // input matrix parameters
+                double* H,
+                double* S, // input matrices
+                const double nElectronExact,
+                const std::string PexsiOptionFile, // pexsi parameters file
+                double*& DM,
+                double*& EDM, // output matrices
+                double& totalEnergyH,
+                double& totalEnergyS,
+                double& totalFreeEnergy,
+                double& mu,
+                double mu0);
+}
+#endif // SIMPLE_PEXSI_H
\ No newline at end of file
diff --git a/source/module_hsolver/test/CMakeLists.txt b/source/module_hsolver/test/CMakeLists.txt
index 795854fcc9..cb785e83bb 100644
--- a/source/module_hsolver/test/CMakeLists.txt
+++ b/source/module_hsolver/test/CMakeLists.txt
@@ -93,6 +93,14 @@ if(ENABLE_LCAO)
       SOURCES diago_lcao_test.cpp ../diago_blas.cpp 
     )
   endif()
+
+  if (ENABLE_PEXSI)
+  AddTest(
+    TARGET HSolver_LCAO_PEXSI
+    LIBS ${math_libs} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} MPI::MPI_CXX base psi device pexsi
+    SOURCES diago_pexsi_test.cpp ../diago_pexsi.cpp ../../module_basis/module_ao/parallel_orbitals.cpp ../../module_basis/module_ao/parallel_2d.cpp
+  )
+  endif()
 endif()
 if (USE_CUDA)
 AddTest(
@@ -116,6 +124,11 @@ install(FILES diago_cg_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 install(FILES diago_david_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 install(FILES diago_lcao_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
 
+install(FILES PEXSI-H-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+install(FILES PEXSI-S-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+install(FILES PEXSI-DM-GammaOnly-Si2.dat DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+install(FILES diago_pexsi_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
+
 find_program(BASH bash)
 add_test(NAME HSolver_cg_parallel
       COMMAND ${BASH} diago_cg_parallel_test.sh
@@ -130,4 +143,10 @@ if(ENABLE_LCAO)
         COMMAND ${BASH} diago_lcao_parallel_test.sh
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
       )
+if(ENABLE_PEXSI)
+  add_test(NAME HSolver_LCAO_PEXSI_parallel
+        COMMAND ${BASH} diago_pexsi_parallel_test.sh
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+      )
+endif()
 endif()   
\ No newline at end of file
diff --git a/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat
new file mode 100644
index 0000000000..1043cc51a1
--- /dev/null
+++ b/source/module_hsolver/test/PEXSI-DM-GammaOnly-Si2.dat
@@ -0,0 +1,107 @@
+  26 26
+ 8
+ 0.660474083048563
+ 3.884e-01 1.025e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.883e-01 1.024e-02 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 1.025e-02 2.683e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.024e-02 2.718e-04 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -7.169e-01
+ 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 1.699e-01
+ 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00
+ 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 0.000e+00 1.699e-01
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01
+ 0.000e+00 1.671e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 1.699e-01 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 1.773e-01
+ 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -4.160e-02
+ 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00
+ 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00 0.000e+00 -4.160e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02
+ 0.000e+00 -4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 -4.160e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02
+ 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 3.891e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00
+ 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.699e-01 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 3.891e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.426e-07 0.000e+00
+ 0.000e+00 0.000e+00 1.671e-01 0.000e+00 0.000e+00 -4.138e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.977e-02 0.000e+00 0.000e+00 -1.699e-01
+ 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 3.891e-02
+ 3.883e-01 1.024e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.884e-01 1.025e-02 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 1.024e-02 2.718e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.025e-02 2.683e-04 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 7.260e-01
+ 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.671e-01
+ 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01 0.000e+00
+ 0.000e+00 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 0.000e+00 -1.671e-01
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -7.169e-01 0.000e+00 0.000e+00 1.773e-01
+ 0.000e+00 -1.699e-01 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 7.260e-01 0.000e+00 0.000e+00 -1.781e-01 0.000e+00 -1.671e-01 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 -1.781e-01
+ 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 4.138e-02
+ 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02 0.000e+00
+ 0.000e+00 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 0.000e+00 0.000e+00 4.138e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.773e-01 0.000e+00 0.000e+00 -4.374e-02
+ 0.000e+00 4.160e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.781e-01 0.000e+00 0.000e+00 4.379e-02 0.000e+00 4.138e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 -5.653e-07 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02
+ 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 3.977e-02 0.000e+00
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00
+ 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -1.671e-01 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 3.977e-02
+ 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 -1.426e-07 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ -5.653e-07 0.000e+00
+ 0.000e+00 0.000e+00 1.699e-01 0.000e+00 0.000e+00 -4.160e-02 0.000e+00 0.000e+00
+ 0.000e+00 0.000e+00 0.000e+00 0.000e+00 3.891e-02 0.000e+00 0.000e+00 -1.671e-01
+ 0.000e+00 0.000e+00 4.138e-02 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
+ 0.000e+00 3.977e-02
\ No newline at end of file
diff --git a/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat
new file mode 100644
index 0000000000..87c76fc184
--- /dev/null
+++ b/source/module_hsolver/test/PEXSI-H-GammaOnly-Si2.dat
@@ -0,0 +1,26 @@
+26 -3.45008963e-01 7.00777279e-01 2.67435378e-18 -6.48062567e-16 5.69806192e-16 -2.01865721e-15 1.37825425e-15 1.02932021e-15 -1.29549404e-15 9.40872433e-16 -4.37541396e-16 -4.22502120e-15 -9.57567315e-16 -5.60569495e-01 3.19974163e-01 9.47088035e-16 6.99293619e-16 2.66546040e-17 2.24941591e-16 3.49977208e-16 6.64422948e-16 1.19697268e-15 -2.18249784e-16 -1.24808688e-15 2.25204771e-15 3.31560018e-16
+ 7.97835961e-01 -3.26671355e-16 9.44414999e-17 1.08101471e-16 2.96820086e-16 -6.21597964e-17 -1.93606525e-17 1.47303571e-15 -5.85952677e-16 -9.93382239e-16 2.85142385e-15 1.26395876e-15 3.19974163e-01 -2.96734690e-02 -9.56683510e-16 2.99393236e-15 1.06817249e-15 2.22266652e-16 -1.05210457e-15 -2.10421437e-15 -2.19285057e-15 8.05560256e-16 1.20571583e-15 -4.19339449e-15 -1.13240719e-15
+ 3.80362144e-01 5.95917771e-16 -2.18276481e-16 -3.54010265e-01 -4.19309899e-16 -2.30363123e-16 1.53423809e-15 -2.91444576e-16 -9.56803808e-17 -5.35261856e-17 -6.16823227e-02 -2.90999424e-16 -2.32177556e-15 -3.89575886e-02 2.12205807e-16 -8.22906289e-17 6.81593611e-03 2.10103917e-17 2.24379615e-16 -4.85438948e-16 9.00442486e-17 1.31161204e-16 4.61089846e-17 2.39556639e-01
+ 3.80362144e-01 2.55037050e-16 -3.75167990e-16 -3.54010265e-01 -3.00934924e-16 4.18935390e-17 1.46886306e-16 -6.16823227e-02 -7.98028325e-16 -4.57838734e-17 -3.14309773e-16 1.21608455e-15 -4.64848149e-17 -3.89575886e-02 -8.08540395e-17 1.28237973e-16 6.81593611e-03 -8.35066919e-17 -1.00156533e-16 -1.74201705e-16 2.39556639e-01 4.32484922e-16 6.72537317e-17
+ 3.80362144e-01 1.49782541e-16 6.27515573e-17 -3.54010265e-01 4.82940748e-17 -6.16823227e-02 6.20938499e-16 4.71589687e-16 -9.18536141e-16 -3.65700951e-16 -2.61249330e-16 -2.01796811e-16 3.17945596e-17 -3.89575886e-02 2.14594692e-16 -1.51811429e-16 6.81593611e-03 -2.12500255e-17 2.39556639e-01 -5.79371716e-17 -2.59943780e-16 5.30093710e-16
+ 6.49510022e-01 -3.99877720e-16 2.33364502e-16 -1.06835200e-15 1.35924369e-16 4.35440159e-16 5.65086482e-17 2.05033710e-02 -2.40594211e-16 -5.69887431e-16 6.81593611e-03 2.06395261e-17 -1.15861484e-16 1.34872389e-01 5.35360292e-17 -5.33661495e-17 3.47762359e-16 -3.36405101e-16 -2.31631258e-16 -2.62331242e-17 -4.85682373e-02
+ 6.49510022e-01 1.01473550e-16 -8.91511572e-16 -7.44322866e-16 2.05033710e-02 3.18067759e-16 1.77405265e-16 6.50186302e-16 -2.33952222e-15 3.10478769e-17 6.81593611e-03 -3.09684775e-17 3.48878974e-17 1.34872389e-01 1.38971253e-16 2.83475393e-17 1.26434741e-16 -4.85682373e-02 -2.28940621e-16 -2.75815582e-16
+ 6.49510022e-01 -6.05413446e-16 2.05033710e-02 -4.85158818e-16 2.18100282e-16 5.90800199e-16 -4.24041197e-16 -1.76755725e-16 2.48864123e-16 6.96454974e-17 6.81593611e-03 -1.21887704e-16 8.99652487e-17 1.34872389e-01 1.30566455e-16 -4.85682373e-02 4.45112250e-17 2.04467699e-16 -3.02221322e-16
+ 1.39709618e+00 3.28775150e-17 3.02403823e-16 1.12276155e-15 4.11283608e-16 -1.68777752e-15 1.99081203e-15 -3.80457763e-16 -2.91238204e-16 -2.93921607e-16 -7.17012563e-20 1.88551508e-16 -4.63408913e-17 -4.73478204e-01 1.77949873e-16 3.47597824e-16 -7.95551748e-17 1.22787443e-15
+ 1.36533927e+00 -4.17480353e-16 2.38782046e-16 2.07954517e-16 4.06349249e-16 -9.28946662e-17 -1.76779162e-16 -2.40828408e-16 -2.39556639e-01 -9.25671896e-17 1.12753601e-16 4.85682373e-02 -1.30259315e-17 2.04879371e-01 2.55654809e-16 -8.58762656e-16 1.09717457e-15
+ 1.36533927e+00 -2.91149533e-16 4.76951783e-17 4.23208590e-16 -1.68531661e-15 3.22333315e-16 -2.39556639e-01 -2.64951724e-17 -6.87350695e-17 4.85682373e-02 1.57686979e-16 -3.76991794e-16 5.08885431e-16 2.04879371e-01 1.27435694e-15 4.01644429e-16
+ 1.39709618e+00 -4.55433608e-17 -2.99630111e-15 3.78928659e-15 6.58110607e-17 3.50160588e-16 -1.80754014e-16 -2.30392781e-17 -3.48739041e-16 1.55913703e-16 -6.27363150e-17 9.66464560e-16 -1.17769212e-15 -4.73478204e-01 3.90844224e-17
+ 1.36533927e+00 -1.30972594e-15 1.71848190e-15 -2.39556639e-01 3.88957496e-17 4.45336175e-17 4.85682373e-02 -1.94106500e-17 -1.20756475e-16 -9.64457501e-16 6.28258136e-16 2.28328203e-16 7.89854971e-17 2.04879371e-01
+ -3.45008963e-01 7.00777279e-01 2.73555881e-15 -1.00946454e-15 -1.16246495e-15 -1.18942654e-15 2.34640662e-15 -2.17975499e-17 1.59173761e-15 1.65013351e-16 -5.00521378e-16 3.23944830e-15 2.47573682e-16
+ 7.97835961e-01 -2.89307129e-16 4.07830148e-16 3.07829438e-16 1.46389913e-16 -1.66937141e-16 -2.06630218e-17 -1.15365109e-15 4.29729746e-16 6.95094012e-16 -2.40691019e-15 -8.41099617e-16
+ 3.80362144e-01 -3.72488558e-16 -1.36186620e-16 -3.54010265e-01 1.65418864e-16 -5.01909637e-17 8.72781487e-16 -3.15475321e-16 -1.75617767e-16 -1.33423172e-16 6.16823227e-02
+ 3.80362144e-01 6.95266223e-16 1.24652862e-16 -3.54010265e-01 -2.05726330e-16 2.83810662e-16 2.42210163e-16 6.16823227e-02 -8.82894651e-16 9.45554362e-18
+ 3.80362144e-01 -5.04905355e-16 -3.43420006e-16 -3.54010265e-01 1.97895263e-16 6.16823227e-02 5.27704784e-16 4.09823362e-16 -1.55579456e-16
+ 6.49510022e-01 3.01665582e-16 1.65533985e-16 -1.52276340e-15 4.86620331e-16 6.36576344e-16 4.94854914e-17 -2.05033710e-02
+ 6.49510022e-01 -1.88619393e-17 -1.87155561e-16 -5.78216449e-16 -2.05033710e-02 1.56194250e-15 -3.49237870e-16
+ 6.49510022e-01 -3.24872954e-16 -2.05033710e-02 -7.32295395e-16 -8.96538828e-16 9.96070017e-16
+ 1.39709618e+00 -1.11050273e-16 -1.69128575e-17 -1.24504759e-15 -8.08763902e-16
+ 1.36533927e+00 3.67630294e-16 -7.04593046e-17 8.84496171e-18
+ 1.36533927e+00 9.35720969e-16 -1.83878822e-16
+ 1.39709618e+00 1.75929309e-16
+ 1.36533927e+00
diff --git a/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat
new file mode 100644
index 0000000000..e1bb9a1439
--- /dev/null
+++ b/source/module_hsolver/test/PEXSI-S-GammaOnly-Si2.dat
@@ -0,0 +1,26 @@
+26 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18 1.24515654e+00 -1.87761557e+00 -4.52345240e-17 -1.21904982e-16 -4.81699167e-17 -3.15892467e-17 -1.29304662e-16 -9.53928505e-18 -6.92703544e-17 -2.51210914e-17 -6.61062629e-17 -5.20340810e-17 -2.29647573e-17
+ 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17 -1.87761557e+00 2.38584105e+00 5.04340357e-17 2.33103467e-17 6.07830843e-18 -2.07895767e-17 3.37674767e-16 -7.05544564e-17 -1.25743735e-16 2.64630033e-17 4.25617115e-17 -2.13442138e-16 2.66781497e-17
+ 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00 4.52345240e-17 -5.04340357e-17 -1.24362116e-01 -3.18772379e-17 -7.50471191e-18 9.98285745e-02 1.08555743e-17 1.01711716e-17 1.79143233e-17 2.27868803e-17 2.29969445e-18 -1.53440018e-18 5.44995645e-01
+ 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18 1.21904982e-16 -2.33103467e-17 -3.18772379e-17 -1.24362116e-01 2.03965534e-17 1.08555743e-17 9.98285745e-02 2.64680855e-17 3.72186277e-17 -3.19162015e-18 5.44995645e-01 3.61445899e-17 1.12147162e-18
+ 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18 4.81699167e-17 -6.07830843e-18 -7.50471191e-18 2.03965534e-17 -1.24362116e-01 1.01711716e-17 2.64680855e-17 9.98285745e-02 -3.06117707e-18 5.44995645e-01 -4.81792340e-18 1.73573992e-17 2.58853269e-17
+ 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20 3.15892467e-17 2.07895767e-17 9.98285745e-02 1.08555743e-17 1.01711716e-17 -3.41075882e-02 1.01779479e-17 6.03087458e-17 -5.67003855e-18 -3.71339244e-18 1.81722449e-17 7.94516905e-19 -4.08542832e-01
+ 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18 1.29304662e-16 -3.37674767e-16 1.08555743e-17 9.98285745e-02 2.64680855e-17 1.01779479e-17 -3.41075882e-02 3.01950305e-17 -1.21972744e-17 2.71050543e-18 -4.08542832e-01 1.97392558e-17 6.03087458e-18
+ 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18 9.53928505e-18 7.05544564e-17 1.01711716e-17 2.64680855e-17 9.98285745e-02 6.03087458e-17 3.01950305e-17 -3.41075882e-02 -4.38763067e-18 -4.08542832e-01 3.43793733e-17 2.17179248e-18 -5.34647196e-18
+ 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19 -6.92703544e-17 -1.25743735e-16 -1.79143233e-17 -3.72186277e-17 3.06117707e-18 5.67003855e-18 1.21972744e-17 4.38763067e-18 -6.02927412e-01 -1.92810110e-17 2.84052499e-18 -7.30159341e-17 6.80624854e-17
+ 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19 -2.51210914e-17 2.64630033e-17 -2.27868803e-17 3.19162015e-18 -5.44995645e-01 3.71339244e-18 -2.71050543e-18 4.08542832e-01 -1.92810110e-17 1.40487499e-01 6.89230709e-18 2.04346699e-17 1.79486282e-17
+ 1.16731758e+00 1.04896560e-17 -6.09863722e-19 -6.61062629e-17 4.25617115e-17 -2.29969445e-18 -5.44995645e-01 4.81792340e-18 -1.81722449e-17 4.08542832e-01 -3.43793733e-17 2.84052499e-18 6.89230709e-18 1.40487499e-01 -3.05927125e-18 -4.40795946e-18
+ 9.27713518e-01 7.03147766e-21 -5.20340810e-17 -2.13442138e-16 1.53440018e-18 -3.61445899e-17 -1.73573992e-17 -7.94516905e-19 -1.97392558e-17 -2.17179248e-18 -7.30159341e-17 2.04346699e-17 -3.05927125e-18 -6.02927412e-01 1.56701095e-18
+ 1.16731758e+00 -2.29647573e-17 2.66781497e-17 -5.44995645e-01 -1.12147162e-18 -2.58853269e-17 4.08542832e-01 -6.03087458e-18 5.34647196e-18 6.80624854e-17 1.79486282e-17 -4.40795946e-18 1.56701095e-18 1.40487499e-01
+ 1.49964801e+00 -1.41677065e+00 -2.71050543e-19 -4.60108297e-17 -3.03983184e-17 7.58941521e-18 7.92145212e-17 1.30781887e-17 -1.58750915e-17 7.77915059e-18 1.07336015e-17 6.22542594e-17 8.78542573e-18
+ 3.24615314e+00 1.08420217e-17 4.77048956e-18 -1.56125113e-17 -3.46944695e-18 2.05998413e-17 3.10081821e-17 4.84909422e-17 1.85669622e-18 -1.14518854e-18 -9.97060572e-17 1.05438661e-17
+ 5.45596765e-01 2.16840434e-18 4.33680869e-18 -5.12717090e-01 -1.38777878e-17 -3.03576608e-18 2.16840434e-19 2.16840434e-19 -4.76710143e-18 8.09763498e-19 0.00000000e+00
+ 5.45596765e-01 5.42101086e-19 -1.38777878e-17 -5.12717090e-01 -1.81061763e-17 1.57209315e-18 -9.14795583e-19 0.00000000e+00 -1.27054942e-18 -1.26038503e-18
+ 5.45596765e-01 -3.03576608e-18 -1.81061763e-17 -5.12717090e-01 -2.75793928e-18 0.00000000e+00 4.40457133e-18 -2.09217138e-18 -7.86046575e-18
+ 5.51297780e-01 -2.77555756e-17 1.73472348e-18 1.35525272e-19 -4.60785923e-19 -1.00559751e-17 5.82758668e-19 -1.35525272e-20
+ 5.51297780e-01 -2.45029691e-17 -5.96311195e-19 -1.35525272e-19 -1.35525272e-20 4.60785923e-19 -3.19839641e-18
+ 5.51297780e-01 2.90024081e-18 -1.35525272e-20 -1.31459513e-18 -4.74338450e-19 2.16840434e-18
+ 9.27713518e-01 2.88668828e-18 -1.21972744e-19 7.70699912e-17 7.72494048e-19
+ 1.16731758e+00 3.47791728e-18 -3.22550146e-18 3.52365706e-19
+ 1.16731758e+00 1.04896560e-17 -6.09863722e-19
+ 9.27713518e-01 7.03147766e-21
+ 1.16731758e+00
diff --git a/source/module_hsolver/test/diago_pexsi_parallel_test.sh b/source/module_hsolver/test/diago_pexsi_parallel_test.sh
new file mode 100644
index 0000000000..4767d690a2
--- /dev/null
+++ b/source/module_hsolver/test/diago_pexsi_parallel_test.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+np=`cat /proc/cpuinfo | grep "cpu cores" | uniq| awk '{print $NF}'`
+echo "nprocs in this machine is $np"
+
+for i in 6 3 2;do
+    if [[ $i -gt $np ]];then
+        continue
+    fi
+    echo "TEST DIAGO PEXSI in parallel, nprocs=$i"
+    mpirun -np $i ./HSolver_LCAO_PEXSI
+    if [[ $? != 0 ]];then
+        echo -e "\e[1;33m [  FAILED  ] \e[0m"\
+			"execute UT with $i cores error."
+        exit 1
+    fi
+    break    
+done
diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp
new file mode 100644
index 0000000000..4b3b8aba99
--- /dev/null
+++ b/source/module_hsolver/test/diago_pexsi_test.cpp
@@ -0,0 +1,428 @@
+#ifdef __PEXSI
+#include <mpi.h>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <vector>
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "module_hsolver/diago_pexsi.h"
+#include "module_hsolver/module_pexsi/pexsi_solver.h"
+#include "module_hsolver/test/diago_elpa_utils.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_base/parallel_global.h"
+#include "module_base/global_variable.h"
+
+#define PASSTHRESHOLD 5e-4
+#define DETAILINFO    false
+#define PRINT_HS      false
+#define REPEATRUN     1
+
+template <typename T> class HamiltTEST : public hamilt::Hamilt<T>
+{
+    public:
+    int desc[9];
+    int nrow, ncol;
+    std::vector<T> h_local;
+    std::vector<T> s_local;
+
+    void matrix(hamilt::MatrixBlock<T> &hk_in, hamilt::MatrixBlock<T> &sk_in) 
+    {
+        hk_in = hamilt::MatrixBlock<T>{this->h_local.data(),
+                                    (size_t)this->nrow,
+                                    (size_t)this->ncol,
+                                    this->desc};
+        sk_in = hamilt::MatrixBlock<T>{this->s_local.data(),
+                                    (size_t)this->nrow,
+                                    (size_t)this->ncol,
+                                    this->desc};
+    }
+
+    void constructHamilt(const int iter, const hamilt::MatrixBlock<double> rho) {}
+    void updateHk(const int ik) {}
+};
+
+
+
+template<class T> class PexsiPrepare 
+{
+  public:
+    PexsiPrepare(int nlocal, int nbands, int nb2d, int sparsity, std::string hfname, std::string sfname, std::string dmname)
+        : nlocal(nlocal), nbands(nbands), nb2d(nb2d), sparsity(sparsity), hfname(hfname), sfname(sfname), dmname(dmname)
+    {
+        MPI_Comm_size(MPI_COMM_WORLD, &dsize);
+        MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
+
+    }
+
+    int dsize, myrank;
+    int nlocal, nbands, nb2d, sparsity;
+    int nprows, npcols, myprow, mypcol;
+    int nrow, ncol;
+    double hsolver_time = 0.0;
+    std::string sfname, hfname, dmname;
+    HamiltTEST<T> hmtest;
+    std::vector<T> h;
+    std::vector<T> s;
+    std::vector<T> h_local;
+    std::vector<T> s_local;
+    psi::Psi<T> psi;
+    hsolver::DiagoPexsi<T>* dh = nullptr;
+    Parallel_Orbitals po;
+    std::vector<double> abc;
+    int icontxt;
+
+    double mu;
+
+    // density matrix
+    std::vector<T*> dm_local;
+    std::vector<T*> edm_local;
+
+    std::vector<T> dm;
+
+    bool read_HS()
+    {
+        bool readhfile = false;
+        bool readsfile = false;
+        if (this->myrank == 0)
+        {
+            int hdim, sdim;
+            readhfile = LCAO_DIAGO_TEST::read_hs<std::vector<T>>(hfname, this->h);
+            readsfile = LCAO_DIAGO_TEST::read_hs<std::vector<T>>(sfname, this->s);
+            hdim = sqrt(this->h.size());
+            sdim = sqrt(this->s.size());
+            if (hdim != sdim)
+            {
+                printf("Error: dimensions of H and S are not equal, %d, %d\n", hdim, sdim);
+                readhfile = readsfile = false;
+            }
+            nlocal = hdim;
+        }
+        MPI_Bcast(&nlocal, 1, MPI_INT, 0, MPI_COMM_WORLD);
+        MPI_Bcast(&readhfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
+        MPI_Bcast(&readsfile, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD);
+        nbands = nlocal/2;
+        if (readhfile && readsfile)
+            return true;
+        return false;
+    }
+
+    bool produce_HS()
+    {
+        bool ok = this->read_HS();
+        return ok;
+    }
+
+    void pb2d()
+    {
+        LCAO_DIAGO_TEST::process_2d(nprows, npcols, myprow, mypcol, icontxt);
+
+        hmtest.nrow = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, nprows, myprow); // the number of row of the new_matrix in each process
+        hmtest.ncol = LCAO_DIAGO_TEST::na_rc(nlocal, nb2d, npcols, mypcol); // the number of column of the new_matrix in each process
+
+
+        int ISRC = 0, info;
+        descinit_(hmtest.desc, &nlocal, &nlocal, &nb2d, &nb2d, &ISRC, &ISRC, &icontxt, &(hmtest.nrow), &info);
+        if (info != 0)
+        {
+            printf("Invalid blacs-distribution. Abort!\n");
+            exit(1);
+        }
+
+        
+        // set po variables
+        po.ncol = hmtest.ncol;
+        po.nrow = hmtest.nrow;
+        po.nb = nb2d;
+        po.blacs_ctxt = icontxt;
+        po.comm_2D = MPI_COMM_WORLD;
+        po.dim0 = nprows;
+        po.dim1 = npcols;
+        po.testpb = true;
+
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "nrow: " << hmtest.nrow << ", ncol: " << hmtest.ncol << ", nb: " << nb2d << std::endl;
+        }
+
+        dh = new hsolver::DiagoPexsi<T>(&po);
+    }
+
+    void distribute_data()
+    {
+
+        int local_size = hmtest.nrow * hmtest.ncol;
+        this->h_local.resize(local_size);
+        this->s_local.resize(local_size);
+
+        LCAO_DIAGO_TEST::distribute_data<T>(this->h.data(),this->h_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt);
+        LCAO_DIAGO_TEST::distribute_data<T>(this->s.data(),this->s_local.data(),nlocal,nb2d,hmtest.nrow,hmtest.ncol,icontxt);
+    }
+
+    void set_env()
+    {
+        GlobalV::NLOCAL = nlocal;
+        GlobalV::NBANDS = nbands;
+        GlobalV::DSIZE = dsize;
+        GlobalV::NSPIN = 1;
+        DIAG_WORLD = MPI_COMM_WORLD;
+        GlobalV::NPROC = dsize;
+
+        psi.fix_k(0);
+    }
+
+    void set_pexsi_vars()
+    {
+        // pexsi::PEXSI_Solver::set_pexsi_vars();
+        pexsi::PEXSI_Solver::pexsi_npole = 40;
+        pexsi::PEXSI_Solver::pexsi_inertia = true;
+        pexsi::PEXSI_Solver::pexsi_nmax = 80;
+        // pexsi_symbolic = 1;
+        pexsi::PEXSI_Solver::pexsi_comm = true;
+        pexsi::PEXSI_Solver::pexsi_storage = true;
+        pexsi::PEXSI_Solver::pexsi_ordering = 0;
+        pexsi::PEXSI_Solver::pexsi_row_ordering = 1;
+        pexsi::PEXSI_Solver::pexsi_nproc = 1;
+        pexsi::PEXSI_Solver::pexsi_symm = true;
+        pexsi::PEXSI_Solver::pexsi_trans = false;
+        pexsi::PEXSI_Solver::pexsi_method = 1;
+        pexsi::PEXSI_Solver::pexsi_nproc_pole = 1;
+        // pexsi_spin = 2;
+        pexsi::PEXSI_Solver::pexsi_temp = 0.015;
+        pexsi::PEXSI_Solver::pexsi_gap = 0;
+        pexsi::PEXSI_Solver::pexsi_delta_e = 20.0;
+        pexsi::PEXSI_Solver::pexsi_mu_lower = -10;
+        pexsi::PEXSI_Solver::pexsi_mu_upper = 10;
+        pexsi::PEXSI_Solver::pexsi_mu = 0.0;
+        pexsi::PEXSI_Solver::pexsi_mu_thr = 0.05;
+        pexsi::PEXSI_Solver::pexsi_mu_expand = 0.3;
+        pexsi::PEXSI_Solver::pexsi_mu_guard = 0.2;
+        pexsi::PEXSI_Solver::pexsi_elec_thr = 0.001;
+        pexsi::PEXSI_Solver::pexsi_zero_thr = 1e-10;
+        pexsi::PEXSI_Solver::pexsi_mu = mu;
+    }
+
+    void diago()
+    {
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Start to solve the KS equation using PEXSI" << std::endl;
+        }
+        this->pb2d();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the 2D parallelization" << std::endl;
+        }
+        this->distribute_data();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the data distribution" << std::endl;
+        }
+        this->set_env();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the environment setting" << std::endl;
+        }
+        double starttime = 0.0, endtime = 0.0;
+        this->set_pexsi_vars();
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the PEXSI setting" << std::endl;
+        }
+        MPI_Barrier(MPI_COMM_WORLD);
+        starttime = MPI_Wtime();
+        for(int i=0;i<REPEATRUN;i++)
+        {
+            hmtest.h_local = this->h_local;
+            hmtest.s_local = this->s_local;
+            dh->diag(&hmtest, psi, nullptr);
+
+            // copy the density matrix to dm_local
+            dm_local = dh->DM;
+            edm_local = dh->EDM;
+        }
+        MPI_Barrier(MPI_COMM_WORLD);
+        if (DETAILINFO && myrank == 0)
+        {
+            std::cout << "Finish the KS equation solving" << std::endl;
+        }
+        endtime = MPI_Wtime();
+        hsolver_time = (endtime - starttime)/REPEATRUN;
+    }
+
+    bool read_ref()
+    {
+        auto f_dm = std::ifstream(dmname);
+        if (!f_dm.is_open())
+        {
+            std::cout << "Error: cannot open the reference file " << dmname << std::endl;
+            return false;
+        }
+        int nread = 0;
+        f_dm >> nread;
+        if (nread != nlocal)
+        {
+            std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl;
+            return false;
+        }
+        f_dm >> nread;
+        if (nread != nlocal)
+        {
+            std::cout << "Error: the number of global orbitals in the reference file is not equal to the current calculation" << std::endl;
+            return false;
+        }
+
+        f_dm >> GlobalV::nelec >> mu;
+        
+        dm.resize(nread*nread);
+        // T* edm = new T[nglobal*nglobal];
+        for (int i = 0; i < nread; i++)
+        {
+            for (int j = 0; j < nread; j++)
+            {
+                f_dm >> dm[i*nread+j];
+            }
+        }
+        return true;
+    }
+
+
+    bool compare_ref(std::stringstream &out_info)
+    {
+        double maxerror = 0.0;
+        int iindex = 0;
+        bool pass = true;
+
+        auto ofs = std::ofstream("dm_local" + std::to_string(myprow) + std::to_string(mypcol) + ".dat");
+
+        int SENDPROW = 0, SENDPCOL = 0, tag = 0;
+
+        // do iteration for matrix, distribute old_matrix to each process, pass a block each time
+        for (int row = 0; row < nlocal; row++)
+        {
+            int recv_prow = (row / nb2d) % nprows; // the row number of recive process
+            int nm_row = ((row / nb2d) / nprows) * nb2d + row % nb2d; // row number of block in new_matrix
+            for (int col = 0; col < nlocal; col += nb2d)
+            {
+                int recv_pcol = (col / nb2d) % npcols; // the column number of recive process
+                int nm_col = ((col / nb2d) / npcols) * nb2d + col % nb2d;
+                int pass_length = std::min(nlocal - col, nb2d); // nlocal may not be devided by nb2d;
+                // at global: nlocal * row + col + i
+                // at local: (nm_col + i) * hmtest.nrow + nm_row
+
+                if (myprow == recv_prow && mypcol == recv_pcol)
+                {
+                    double diff = 0;
+                    for (int i = 0; i < pass_length; i++)
+                    {
+                        diff = std::abs(dm_local[0][(nm_col + i) * hmtest.nrow + nm_row] - dm[nlocal * row + col + i]);
+                        if (diff > maxerror)
+                        {
+                            maxerror = diff;
+                        }
+                        if (diff > PASSTHRESHOLD)
+                        {
+                            pass = false;
+                        }
+                    }
+                }
+            }
+        }
+
+        bool pass_all = true;
+        double maxerror_all = 0.0;
+        MPI_Allreduce(&pass, &pass_all, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD);
+        MPI_Allreduce(&maxerror, &maxerror_all, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+        if (myrank == 0)
+        {
+            std::cout << "H/S matrix are read from " << hfname << ", " << sfname << std::endl;
+            std::cout << "Density matrix are read from " << dmname << std::endl;
+            std::cout << std::endl;
+            out_info << "Maximum difference between ks_hsolver and ref is " << maxerror_all << ", the pass threshold is " << PASSTHRESHOLD << std::endl;
+
+            if (DETAILINFO)
+            {
+                std::cout << out_info.str();
+                out_info.str("");
+                out_info.clear();
+            }
+        }
+        delete dh;
+        return pass_all;
+    }
+};
+
+class PexsiGammaOnlyTest : public ::testing::TestWithParam<PexsiPrepare<double>> {};
+
+TEST_P(PexsiGammaOnlyTest, LCAO)
+{
+    std::stringstream out_info;
+    PexsiPrepare<double> dp = GetParam();
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "nlocal: " << dp.nlocal << ", nbands: " << dp.nbands << ", nb2d: " << dp.nb2d << ", sparsity: " << dp.sparsity << std::endl;
+    
+    }
+    ASSERT_TRUE(dp.produce_HS());
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "H/S matrix are read from " << dp.hfname << ", " << dp.sfname << std::endl;
+    }
+    ASSERT_TRUE(dp.read_ref());
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "Density matrix are read from " << dp.dmname << std::endl;
+    }
+    dp.diago();
+    if (DETAILINFO && dp.myrank == 0)
+    {
+        std::cout << "Time for hsolver: " << dp.hsolver_time << "s" << std::endl;
+    }
+
+    bool pass = dp.compare_ref(out_info);
+    EXPECT_TRUE(pass) << out_info.str();
+    
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    DiagoTest,
+    PexsiGammaOnlyTest,
+    ::testing::Values( //int nlocal, int nbands, int nb2d, int sparsity, std::string ks_solver_in, std::string hfname, std::string sfname
+        PexsiPrepare<double>(0, 0, 2, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat"),
+        PexsiPrepare<double>(0, 0, 1, 0, "PEXSI-H-GammaOnly-Si2.dat", "PEXSI-S-GammaOnly-Si2.dat", "PEXSI-DM-GammaOnly-Si2.dat")
+
+    ));
+
+
+int main(int argc, char **argv)
+{
+    MPI_Init(&argc, &argv);
+    int mypnum, dsize;
+    MPI_Comm_size(MPI_COMM_WORLD, &dsize);
+    MPI_Comm_rank(MPI_COMM_WORLD, &mypnum);
+
+    testing::InitGoogleTest(&argc, argv);
+    //Parallel_Global::split_diag_world(dsize);
+    ::testing::TestEventListeners &listeners = ::testing::UnitTest::GetInstance()->listeners();
+    if (mypnum != 0)
+    {
+        delete listeners.Release(listeners.default_result_printer());
+    }
+    int result = RUN_ALL_TESTS();
+
+    if (mypnum == 0 && result != 0)
+    {
+        std::cout << "ERROR:some tests are not passed" << std::endl;
+        return result;
+    }
+    else
+    {
+        MPI_Finalize();
+        return 0;
+    }
+}
+
+
+#endif // __PEXSI
\ No newline at end of file
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index e7ee360a28..f93e01659b 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -650,6 +650,34 @@ void Input::Default(void)
     qo_thr = 1e-6;
     qo_screening_coeff = {};
 
+    //==========================================================
+    // variables for PEXSI
+    //==========================================================
+    pexsi_npole = 40;
+    pexsi_inertia = true;
+    pexsi_nmax = 80;
+    // pexsi_symbolic = 1;
+    pexsi_comm = true;
+    pexsi_storage = true;
+    pexsi_ordering = 0;
+    pexsi_row_ordering = 1;
+    pexsi_nproc = 1;
+    pexsi_symm = true;
+    pexsi_trans = false;
+    pexsi_method = 1;
+    pexsi_nproc_pole = 1;
+    // pexsi_spin = 2;
+    pexsi_temp = 0.015;
+    pexsi_gap = 0;
+    pexsi_delta_e = 20.0;
+    pexsi_mu_lower = -10;
+    pexsi_mu_upper = 10;
+    pexsi_mu = 0.0;
+    pexsi_mu_thr = 0.05;
+    pexsi_mu_expand = 0.3;
+    pexsi_mu_guard = 0.2;
+    pexsi_elec_thr = 0.001;
+    pexsi_zero_thr = 1e-10;
     return;
 }
 
@@ -2358,6 +2386,9 @@ bool Input::Read(const std::string& fn)
         {
             read_value(ifs, sc_file);
         }
+        //----------------------------------------------------------------------------------
+        //    Quasiatomic orbital
+        //----------------------------------------------------------------------------------
         else if (strcmp("qo_switch", word) == 0){
             read_bool(ifs, qo_switch);
         }
@@ -2373,6 +2404,106 @@ bool Input::Read(const std::string& fn)
         else if (strcmp("qo_screening_coeff", word) == 0){
             read_value2stdvector(ifs, qo_screening_coeff);
         }
+        //----------------------------------------------------------------------------------
+        //    PEXSI
+        //----------------------------------------------------------------------------------
+        else if (strcmp("pexsi_npole", word) == 0){
+            read_value(ifs, pexsi_npole);
+        }
+        else if (strcmp("pexsi_inertia", word) == 0){
+            read_value(ifs, pexsi_inertia);
+        }
+        else if (strcmp("pexsi_nmax", word) == 0) {
+            read_value(ifs, pexsi_nmax);
+        }
+        // else if (strcmp("pexsi_symbolic", word) == 0)
+        // {
+        //     read_value(ifs, pexsi_symbolic);
+        // }
+        else if (strcmp("pexsi_comm", word) == 0)
+        {
+            read_value(ifs, pexsi_comm);
+        }
+        else if (strcmp("pexsi_storage", word) == 0)
+        {
+            read_value(ifs, pexsi_storage);
+        }
+        else if (strcmp("pexsi_ordering", word) == 0)
+        {
+            read_value(ifs, pexsi_ordering);
+        }
+        else if (strcmp("pexsi_row_ordering", word) == 0)
+        {
+            read_value(ifs, pexsi_row_ordering);
+        }
+        else if (strcmp("pexsi_nproc", word) == 0)
+        {
+            read_value(ifs, pexsi_nproc);
+        }
+        else if (strcmp("pexsi_symm", word) == 0)
+        {
+            read_value(ifs, pexsi_symm);
+        }
+        else if (strcmp("pexsi_trans", word) == 0)
+        {
+            read_value(ifs, pexsi_trans);
+        }
+        else if (strcmp("pexsi_method", word) == 0)
+        {
+            read_value(ifs, pexsi_method);
+        }
+        else if (strcmp("pexsi_nproc_pole", word) == 0)
+        {
+            read_value(ifs, pexsi_nproc_pole);
+        }
+        // else if (strcmp("pexsi_spin", word) == 0)
+        // {
+        //     read_value(ifs, pexsi_spin);
+        // }
+        else if (strcmp("pexsi_temp", word) == 0)
+        {
+            read_value(ifs, pexsi_temp);
+        }
+        else if (strcmp("pexsi_gap", word) == 0)
+        {
+            read_value(ifs, pexsi_gap);
+        }
+        else if (strcmp("pexsi_delta_e", word) == 0)
+        {
+            read_value(ifs, pexsi_delta_e);
+        }
+        else if (strcmp("pexsi_mu_lower", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_lower);
+        }
+        else if (strcmp("pexsi_mu_upper", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_upper);
+        }
+        else if (strcmp("pexsi_mu", word) == 0)
+        {
+            read_value(ifs, pexsi_mu);
+        }
+        else if (strcmp("pexsi_mu_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_thr);
+        }
+        else if (strcmp("pexsi_mu_expand", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_expand);
+        }
+        else if (strcmp("pexsi_mu_guard", word) == 0)
+        {
+            read_value(ifs, pexsi_mu_guard);
+        }
+        else if (strcmp("pexsi_elec_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_elec_thr);
+        }
+        else if (strcmp("pexsi_zero_thr", word) == 0)
+        {
+            read_value(ifs, pexsi_zero_thr);
+        }
         else
         {
             // xiaohui add 2015-09-15
@@ -3754,6 +3885,34 @@ void Input::Bcast()
     Parallel_Common::bcast_bool(qo_switch);
     Parallel_Common::bcast_string(qo_basis);
     Parallel_Common::bcast_double(qo_thr);
+    //==========================================================
+    // PEXSI
+    //==========================================================
+    Parallel_Common::bcast_int(pexsi_npole);
+    Parallel_Common::bcast_bool(pexsi_inertia);
+    Parallel_Common::bcast_int(pexsi_nmax);
+    // Parallel_Common::bcast_int(pexsi_symbolic);
+    Parallel_Common::bcast_bool(pexsi_comm);
+    Parallel_Common::bcast_bool(pexsi_storage);
+    Parallel_Common::bcast_int(pexsi_ordering);
+    Parallel_Common::bcast_int(pexsi_row_ordering);
+    Parallel_Common::bcast_int(pexsi_nproc);
+    Parallel_Common::bcast_bool(pexsi_symm);
+    Parallel_Common::bcast_bool(pexsi_trans);
+    Parallel_Common::bcast_int(pexsi_method);
+    Parallel_Common::bcast_int(pexsi_nproc_pole);
+    // Parallel_Common::bcast_double(pexsi_spin);
+    Parallel_Common::bcast_double(pexsi_temp);
+    Parallel_Common::bcast_double(pexsi_gap);
+    Parallel_Common::bcast_double(pexsi_delta_e);
+    Parallel_Common::bcast_double(pexsi_mu_lower);
+    Parallel_Common::bcast_double(pexsi_mu_upper);
+    Parallel_Common::bcast_double(pexsi_mu);
+    Parallel_Common::bcast_double(pexsi_mu_thr);
+    Parallel_Common::bcast_double(pexsi_mu_expand);
+    Parallel_Common::bcast_double(pexsi_mu_guard);
+    Parallel_Common::bcast_double(pexsi_elec_thr);
+    Parallel_Common::bcast_double(pexsi_zero_thr);
     /* broadcasting std::vector is sometime a annorying task... */
     if (ntype != 0) /* ntype has been broadcasted before */
     {
@@ -3974,6 +4133,10 @@ void Input::Check(void)
         {
             ModuleBase::WARNING_QUIT("Input", "lapack can not be used with plane wave basis.");
         }
+        else if (ks_solver == "pexsi")
+        {
+            ModuleBase::WARNING_QUIT("Input", "pexsi can not be used with plane wave basis.");
+        }
         else if (ks_solver != "default" && 
                  ks_solver != "cg" && 
                  ks_solver != "dav" && 
@@ -4046,6 +4209,17 @@ void Input::Check(void)
 #ifndef __MPI
             ModuleBase::WARNING_QUIT("Input", "Cusolver can not be used for series version.");
 #endif
+        }
+        else if (ks_solver == "pexsi")
+        {
+#ifdef __PEXSI
+            GlobalV::ofs_warning << " It's ok to use pexsi." << std::endl;
+#else
+            ModuleBase::WARNING_QUIT("Input",
+                "Can not use PEXSI if abacus is not compiled with PEXSI. Please change ks_solver to scalapack_gvx.");
+#endif
+
+
         }
         else if (ks_solver != "default")
         {
diff --git a/source/module_io/input.h b/source/module_io/input.h
index 1f2f81903a..fb1b79f3ef 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -612,6 +612,34 @@ class Input
     double qo_thr = 1e-6;
     std::vector<std::string> qo_strategy = {};
     std::vector<double> qo_screening_coeff = {};
+    //==========================================================
+    // variables for PEXSI
+    //==========================================================
+    int pexsi_npole = 40;
+    bool pexsi_inertia = true;
+    int pexsi_nmax = 80;
+    // int pexsi_symbolic = 1;
+    bool pexsi_comm = true;
+    bool pexsi_storage = true;
+    int pexsi_ordering = 0;
+    int pexsi_row_ordering = 1;
+    int pexsi_nproc = 1;
+    bool pexsi_symm = true;
+    bool pexsi_trans = false;
+    int pexsi_method = 1;
+    int pexsi_nproc_pole = 1;
+    // double pexsi_spin = 2;
+    double pexsi_temp = 0.015;
+    double pexsi_gap = 0;
+    double pexsi_delta_e = 20.0;
+    double pexsi_mu_lower = -10;
+    double pexsi_mu_upper = 10;
+    double pexsi_mu = 0.0;
+    double pexsi_mu_thr = 0.05;
+    double pexsi_mu_expand = 0.3;
+    double pexsi_mu_guard = 0.2;
+    double pexsi_elec_thr = 0.001;
+    double pexsi_zero_thr = 1e-10;
     
     std::time_t get_start_time(void) const
     {
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index 755067f29b..54de3d6e1e 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -26,6 +26,9 @@
 #include "module_hamilt_lcao/module_dftu/dftu.h"
 #include "module_hamilt_lcao/module_tddft/evolve_elec.h"
 #endif
+#ifdef __PEXSI
+#include "module_hsolver/module_pexsi/pexsi_solver.h"
+#endif
 
 #include "module_base/timer.h"
 #include "module_elecstate/elecstate_lcao.h"
@@ -827,6 +830,37 @@ void Input_Conv::Convert(void)
     GlobalV::qo_strategy = INPUT.qo_strategy;
     GlobalV::qo_thr = INPUT.qo_thr;
     GlobalV::qo_screening_coeff = INPUT.qo_screening_coeff;
+
+    //-----------------------------------------------
+    // PEXSI related parameters
+    //-----------------------------------------------
+#ifdef __PEXSI
+    pexsi::PEXSI_Solver::pexsi_npole = INPUT.pexsi_npole;
+    pexsi::PEXSI_Solver::pexsi_inertia = INPUT.pexsi_inertia;
+    pexsi::PEXSI_Solver::pexsi_nmax = INPUT.pexsi_nmax;
+    // pexsi::PEXSI_Solver::pexsi_symbolic = INPUT.pexsi_symbolic;
+    pexsi::PEXSI_Solver::pexsi_comm = INPUT.pexsi_comm;
+    pexsi::PEXSI_Solver::pexsi_storage = INPUT.pexsi_storage;
+    pexsi::PEXSI_Solver::pexsi_ordering = INPUT.pexsi_ordering;
+    pexsi::PEXSI_Solver::pexsi_row_ordering = INPUT.pexsi_row_ordering;
+    pexsi::PEXSI_Solver::pexsi_nproc = INPUT.pexsi_nproc;
+    pexsi::PEXSI_Solver::pexsi_symm = INPUT.pexsi_symm;
+    pexsi::PEXSI_Solver::pexsi_trans = INPUT.pexsi_trans;
+    pexsi::PEXSI_Solver::pexsi_method = INPUT.pexsi_method;
+    pexsi::PEXSI_Solver::pexsi_nproc_pole = INPUT.pexsi_nproc_pole;
+    // pexsi::PEXSI_Solver::pexsi_spin = INPUT.pexsi_spin;
+    pexsi::PEXSI_Solver::pexsi_temp = INPUT.pexsi_temp;
+    pexsi::PEXSI_Solver::pexsi_gap = INPUT.pexsi_gap;
+    pexsi::PEXSI_Solver::pexsi_delta_e = INPUT.pexsi_delta_e;
+    pexsi::PEXSI_Solver::pexsi_mu_lower = INPUT.pexsi_mu_lower;
+    pexsi::PEXSI_Solver::pexsi_mu_upper = INPUT.pexsi_mu_upper;
+    pexsi::PEXSI_Solver::pexsi_mu = INPUT.pexsi_mu;
+    pexsi::PEXSI_Solver::pexsi_mu_thr = INPUT.pexsi_mu_thr;
+    pexsi::PEXSI_Solver::pexsi_mu_expand = INPUT.pexsi_mu_expand;
+    pexsi::PEXSI_Solver::pexsi_mu_guard = INPUT.pexsi_mu_guard;
+    pexsi::PEXSI_Solver::pexsi_elec_thr = INPUT.pexsi_elec_thr;
+    pexsi::PEXSI_Solver::pexsi_zero_thr = INPUT.pexsi_zero_thr;
+#endif
     ModuleBase::timer::tick("Input_Conv", "Convert");
     return;
 }
diff --git a/source/module_io/test/for_testing_input_conv.h b/source/module_io/test/for_testing_input_conv.h
index 1e25cc32e5..b012dbf183 100644
--- a/source/module_io/test/for_testing_input_conv.h
+++ b/source/module_io/test/for_testing_input_conv.h
@@ -26,6 +26,9 @@
 #include "module_relax/relax_old/ions_move_basic.h"
 #include "module_relax/relax_old/ions_move_cg.h"
 #include "module_relax/relax_old/lattice_change_basic.h"
+#ifdef __PEXSI
+#include "module_hsolver/module_pexsi/pexsi_solver.h"
+#endif
 
 bool berryphase::berry_phase_flag = false;
 
@@ -355,6 +358,37 @@ pseudopot_cell_vnl ppcell;
 Charge_Mixing CHR_MIX;
 } // namespace GlobalC
 
+#ifdef  __PEXSI
+namespace pexsi
+{
+int PEXSI_Solver::pexsi_npole = 0;
+bool PEXSI_Solver::pexsi_inertia = 0;
+int PEXSI_Solver::pexsi_nmax = 0;
+// int PEXSI_Solver::pexsi_symbolic = 0;
+bool PEXSI_Solver::pexsi_comm = 0;
+bool PEXSI_Solver::pexsi_storage = 0;
+int PEXSI_Solver::pexsi_ordering = 0;
+int PEXSI_Solver::pexsi_row_ordering = 0;
+int PEXSI_Solver::pexsi_nproc = 0;
+bool PEXSI_Solver::pexsi_symm = 0;
+bool PEXSI_Solver::pexsi_trans = 0;
+int PEXSI_Solver::pexsi_method = 0;
+int PEXSI_Solver::pexsi_nproc_pole = 0;
+// double PEXSI_Solver::pexsi_spin = 2;
+double PEXSI_Solver::pexsi_temp = 0.0;
+double PEXSI_Solver::pexsi_gap = 0.0;
+double PEXSI_Solver::pexsi_delta_e = 0.0;
+double PEXSI_Solver::pexsi_mu_lower = 0.0;
+double PEXSI_Solver::pexsi_mu_upper = 0.0;
+double PEXSI_Solver::pexsi_mu = 0.0;
+double PEXSI_Solver::pexsi_mu_thr = 0.0;
+double PEXSI_Solver::pexsi_mu_expand = 0.0;
+double PEXSI_Solver::pexsi_mu_guard = 0.0;
+double PEXSI_Solver::pexsi_elec_thr = 0.0;
+double PEXSI_Solver::pexsi_zero_thr = 0.0;
+} // namespace pexsi
+#endif
+
 #undef private
 
 #endif
diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp
index bd135d19d5..2f64454291 100644
--- a/source/module_io/test/input_conv_test.cpp
+++ b/source/module_io/test/input_conv_test.cpp
@@ -523,6 +523,40 @@ TEST_F(InputConvTest, ReadTdEfieldTest)
     EXPECT_EQ(elecstate::H_TDDFT_pw::heavi_t0[0], 100);
     EXPECT_NEAR(elecstate::H_TDDFT_pw::heavi_amp[0], 1.00 * ModuleBase::BOHR_TO_A / ModuleBase::Ry_to_eV, 1e-8);
 }
+
+#ifdef __PEXSI
+TEST_F(InputConvTest, PEXSI)
+{
+	INPUT.Default();
+	std::string input_file = "./support/INPUT";
+	INPUT.Read(input_file);
+	Input_Conv::Convert();
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_npole, 40);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_inertia);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nmax, 80);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_comm);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_storage);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_ordering, 0);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_row_ordering, 1);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc, 1);
+	EXPECT_TRUE(pexsi::PEXSI_Solver::pexsi_symm);
+	EXPECT_FALSE(pexsi::PEXSI_Solver::pexsi_trans);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_method, 1);
+	EXPECT_EQ(pexsi::PEXSI_Solver::pexsi_nproc_pole, 1);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_temp, 0.015);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_gap, 0);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_delta_e, 20);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_lower, -10);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_upper, 10);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu, 0);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_thr, 0.05);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_expand, 0.3);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_mu_guard, 0.2);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_elec_thr, 0.001);
+	EXPECT_DOUBLE_EQ(pexsi::PEXSI_Solver::pexsi_zero_thr, 1e-10);
+}
+#endif
+
 #endif
 
 #undef private
diff --git a/source/module_io/test/input_test_para.cpp b/source/module_io/test/input_test_para.cpp
index 28a4562947..5a6490551a 100644
--- a/source/module_io/test/input_test_para.cpp
+++ b/source/module_io/test/input_test_para.cpp
@@ -397,6 +397,30 @@ TEST_F(InputParaTest, Bcast)
     EXPECT_EQ(INPUT.qo_screening_coeff.size(), 0);
     EXPECT_EQ(INPUT.qo_thr, 1e-6);
     EXPECT_EQ(INPUT.qo_basis, "szv");
+
+    EXPECT_EQ(INPUT.pexsi_npole, 40);
+    EXPECT_TRUE(INPUT.pexsi_inertia);
+    EXPECT_EQ(INPUT.pexsi_nmax, 80);
+    EXPECT_TRUE(INPUT.pexsi_comm);
+    EXPECT_TRUE(INPUT.pexsi_storage);
+    EXPECT_EQ(INPUT.pexsi_ordering, 0);
+    EXPECT_EQ(INPUT.pexsi_row_ordering, 1);
+    EXPECT_EQ(INPUT.pexsi_nproc, 1);
+    EXPECT_TRUE(INPUT.pexsi_symm);
+    EXPECT_FALSE(INPUT.pexsi_trans);
+    EXPECT_EQ(INPUT.pexsi_method, 1);
+    EXPECT_EQ(INPUT.pexsi_nproc_pole, 1);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_temp, 0.015);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_gap, 0);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_delta_e, 20);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_lower, -10);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_upper, 10);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu, 0);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_thr, 0.05);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_expand, 0.3);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_mu_guard, 0.2);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_elec_thr, 0.001);
+    EXPECT_DOUBLE_EQ(INPUT.pexsi_zero_thr, 1e-10);
 }
 
 TEST_F(InputParaTest, Init)
diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp
index ef9c6a3d28..dc304df90b 100644
--- a/source/module_io/test/write_input_test.cpp
+++ b/source/module_io/test/write_input_test.cpp
@@ -927,4 +927,42 @@ TEST_F(write_input, Deltaspin22)
     EXPECT_THAT(output, testing::HasSubstr("sccut                          3 #Maximal step size for lambda in eV/uB"));
     remove("write_input_test.log");
 }
+
+TEST_F (write_input, PEXSI24)
+{
+    INPUT.Default();
+    INPUT.Read("./support/witestfile");
+    std::string output_file = "write_input_test.log";
+    INPUT.Print(output_file);
+    int a = access("write_input_test.log", 00);
+    EXPECT_EQ(a, 0);
+    std::ifstream ifs ("write_input_test.log");
+    std::string output ((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
+    EXPECT_THAT(output, testing::HasSubstr("#Parameters (24.PEXSI)"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_npole                    40 #Number of poles in expansion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_inertia                  1 #Whether inertia counting is used at the very beginning of PEXSI process"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_nmax                     80 #Maximum number of PEXSI iterations after each inertia counting procedure."));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_comm                     1 #Whether to construct PSelInv communication pattern"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_storage                  1 #Storage space used by the Selected Inversion algorithm for symmetric matrices."));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_ordering                 0 #Ordering strategy for factorization and selected inversion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_row_ordering             1 #Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc                    1 #Number of processors for parmetis"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_symm                     1 #Matrix symmetry"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_trans                    0 #Whether to transpose"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_method                   1 #pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_nproc_pole               1 #Number of processes used by each pole"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_temp                     0.015 #Temperature, in the same unit as H"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_gap                      0 #Spectral gap"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_delta_e                  20 #An upper bound for the spectral radius of \\f$S^{-1} H\\f$"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_lower                 -10 #Initial guess of lower bound for mu"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_upper                 10 #Initial guess of upper bound for mu"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu                       0 #Initial guess for mu (for the solver)"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_thr                   0.05 #Stopping criterion in terms of the chemical potential for the inertia counting procedure"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_expand                0.3 #If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_mu_guard                 0.2 #Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_elec_thr                 0.001 #Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact"));
+    EXPECT_THAT(output, testing::HasSubstr("pexsi_zero_thr                 1e-10 #if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0"));
+    ifs.close();
+    remove("write_input_test.log");
+}
 #undef private
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index a9754d8939..91a04201ab 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -219,7 +219,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
 
     ofs << "\n#Parameters (5.LCAO)" << std::endl;
     ModuleBase::GlobalFunc::OUTP(ofs, "basis_type", basis_type, "PW; LCAO in pw; LCAO");
-    if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver")
+    if (ks_solver == "HPSEPS" || ks_solver == "genelpa" || ks_solver == "scalapack_gvx" || ks_solver == "cusolver" || ks_solver == "pexsi")
     {
         ModuleBase::GlobalFunc::OUTP(ofs, "nb2d", nb2d, "2d distribution of atoms");
     }
@@ -511,7 +511,32 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_switch", qo_switch, "0: no QO analysis; 1: QO analysis");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_basis", qo_basis, "type of QO basis function: hydrogen: hydrogen-like basis, pswfc: read basis from pseudopotential");
     ModuleBase::GlobalFunc::OUTP(ofs, "qo_thr", qo_thr, "accuracy for evaluating cutoff radius of QO basis function");
-  
+
+    ofs << "\n#Parameters (24.PEXSI)" << std::endl;
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_npole", pexsi_npole, "Number of poles in expansion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_inertia", pexsi_inertia, "Whether inertia counting is used at the very beginning of PEXSI process");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nmax", pexsi_nmax, "Maximum number of PEXSI iterations after each inertia counting procedure.");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_comm", pexsi_comm, "Whether to construct PSelInv communication pattern");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_storage", pexsi_storage, "Storage space used by the Selected Inversion algorithm for symmetric matrices.");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_ordering", pexsi_ordering, "Ordering strategy for factorization and selected inversion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_row_ordering", pexsi_row_ordering, "Row permutation strategy for factorization and selected inversion, 0: NoRowPerm, 1: LargeDiag");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc", pexsi_nproc, "Number of processors for parmetis");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_symm", pexsi_symm, "Matrix symmetry");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_trans", pexsi_trans, "Whether to transpose");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_method", pexsi_method, "pole expansion method, 1: Cauchy Contour Integral, 2: Moussa optimized method");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_nproc_pole", pexsi_nproc_pole, "Number of processes used by each pole");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_temp", pexsi_temp, "Temperature, in the same unit as H");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_gap", pexsi_gap, "Spectral gap");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_delta_e", pexsi_delta_e, "An upper bound for the spectral radius of \\f$S^{-1} H\\f$");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_lower", pexsi_mu_lower, "Initial guess of lower bound for mu");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_upper", pexsi_mu_upper, "Initial guess of upper bound for mu");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu", pexsi_mu, "Initial guess for mu (for the solver)");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_thr", pexsi_mu_thr, "Stopping criterion in terms of the chemical potential for the inertia counting procedure");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_expand", pexsi_mu_expand, "If the chemical potential is not in the initial interval, the interval is expanded by muInertiaExpansion");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_mu_guard", pexsi_mu_guard, "Safe guard criterion in terms of the chemical potential to reinvoke the inertia counting procedure");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_elec_thr", pexsi_elec_thr, "Stopping criterion of the PEXSI iteration in terms of the number of electrons compared to numElectronExact");
+    ModuleBase::GlobalFunc::OUTP(ofs, "pexsi_zero_thr", pexsi_zero_thr, "if the absolute value of matrix element is less than ZERO_Limit, it will be considered as 0");
+
     ofs.close();
     return;
 }
\ No newline at end of file