From a671dcb0360246a8fd498cd487cb6da700d4c77c Mon Sep 17 00:00:00 2001 From: Yang Shengxin Date: Sun, 7 Sep 2025 15:18:37 +0800 Subject: [PATCH 01/13] Feature: add DFT-1/2 and shell DFT-1/2, currently only support PW esolver_ks_pw. * Added Sep, Sep_Cell, and VSep to organize the self-energy potential of DFT-1/2 * Added a new effective potential pot_sep for calculating the self-energy potential * Added initialization of the self-energy potential in the esolver_ks_pw control flow * Added the keyword SEP_FILES in the STRU file for reading self-energy potential files * Added the dfthalf_type keyword in INPUT to enable DFT-1/2 and shell DFT-1/2 --- source/source_cell/CMakeLists.txt | 2 + source/source_cell/sep.cpp | 122 ++ source/source_cell/sep.h | 39 + source/source_cell/sep_cell.cpp | 127 ++ source/source_cell/sep_cell.h | 74 ++ source/source_cell/test/CMakeLists.txt | 53 +- .../source_cell/test/bcast_read_sep_test.sh | 18 + .../source_cell/test/bcast_sep_cell_test.sh | 18 + source/source_cell/test/read_sep_test.cpp | 146 +++ source/source_cell/test/sepcell_test.cpp | 286 +++++ source/source_cell/test/support/C_ca_50.sep | 1014 ++++++++++++++++ source/source_cell/test/support/F_pbe_50.sep | 1046 +++++++++++++++++ source/source_cell/test/support/STRU_LiF | 28 + .../test/support/STRU_LiF_Warning1 | 27 + source/source_cell/test_pw/CMakeLists.txt | 6 +- source/source_cell/unitcell.cpp | 74 +- source/source_esolver/esolver_ks_pw.cpp | 13 + source/source_estate/CMakeLists.txt | 3 +- source/source_estate/module_pot/pot_sep.cpp | 25 + source/source_estate/module_pot/pot_sep.h | 46 + .../module_pot/potential_types.cpp | 6 +- .../module_parameter/input_parameter.h | 20 +- .../source_io/read_input_item_elec_stru.cpp | 24 +- source/source_md/test/CMakeLists.txt | 22 +- source/source_pw/module_pwdft/CMakeLists.txt | 1 + source/source_pw/module_pwdft/VSep_in_pw.cpp | 152 +++ source/source_pw/module_pwdft/VSep_in_pw.h | 30 + source/source_pw/module_pwdft/hamilt_pw.cpp | 4 + .../module_pwdft/test/CMakeLists.txt | 6 +- 29 files changed, 3355 insertions(+), 77 deletions(-) create mode 100644 source/source_cell/sep.cpp create mode 100644 source/source_cell/sep.h create mode 100644 source/source_cell/sep_cell.cpp create mode 100644 source/source_cell/sep_cell.h create mode 100644 source/source_cell/test/bcast_read_sep_test.sh create mode 100644 source/source_cell/test/bcast_sep_cell_test.sh create mode 100644 source/source_cell/test/read_sep_test.cpp create mode 100644 source/source_cell/test/sepcell_test.cpp create mode 100644 source/source_cell/test/support/C_ca_50.sep create mode 100644 source/source_cell/test/support/F_pbe_50.sep create mode 100644 source/source_cell/test/support/STRU_LiF create mode 100644 source/source_cell/test/support/STRU_LiF_Warning1 create mode 100644 source/source_estate/module_pot/pot_sep.cpp create mode 100644 source/source_estate/module_pot/pot_sep.h create mode 100644 source/source_pw/module_pwdft/VSep_in_pw.cpp create mode 100644 source/source_pw/module_pwdft/VSep_in_pw.h diff --git a/source/source_cell/CMakeLists.txt b/source/source_cell/CMakeLists.txt index 95d997ecfd..f9d4f0c535 100644 --- a/source/source_cell/CMakeLists.txt +++ b/source/source_cell/CMakeLists.txt @@ -26,6 +26,8 @@ add_library( print_cell.cpp read_atom_species.cpp k_vector_utils.cpp + sep.cpp + sep_cell.cpp ) if(ENABLE_COVERAGE) diff --git a/source/source_cell/sep.cpp b/source/source_cell/sep.cpp new file mode 100644 index 0000000000..9380b44444 --- /dev/null +++ b/source/source_cell/sep.cpp @@ -0,0 +1,122 @@ +#include "sep.h" + +#include "source_base/global_variable.h" +#include "source_base/parallel_common.h" +#include "source_base/tool_title.h" +#include "source_io/output.h" + +#include +#include +#include + +SepPot::SepPot() +{ +} + +SepPot::~SepPot() +{ + delete[] r; + r = nullptr; + delete[] rv; + rv = nullptr; +} + +int SepPot::read_sep(std::ifstream& ifs) +{ + std::string line; + while (std::getline(ifs, line)) + { + std::istringstream iss(line); + std::string key; + iss >> key; + + if (key == "Sep.Element") + { + iss >> label; + } + else if (key == "Sep.XcType") + { + iss >> xc_type; + } + else if (key == "Sep.Orbital") + { + iss >> orbital; + } + else if (key == "Sep.Points") + { + iss >> mesh; + delete[] r; + r = new double[mesh]; + delete[] rv; + rv = new double[mesh]; + } + else if (key == "Sep.StripAmount") + { + iss >> strip_elec; + } + else if (key == "> r_val >> rv_val) + { + r[idx] = r_val; + rv[idx] = rv_val; + idx++; + } + } + break; + } + } + return 0; +} + +void SepPot::print_sep_info(std::ofstream& ofs) +{ + ofs << "\n sep_vl:"; + ofs << "\n sep_info:"; + ofs << "\n label " << label; + ofs << "\n xc " << xc_type; + ofs << "\n orbital " << orbital; + ofs << "\n strip electron" << strip_elec; +} + +void SepPot::print_sep_vsep(std::ofstream& ofs) +{ + ofs << "\n mesh " << mesh; + output::printr1_d(ofs, " r : ", r, mesh); + output::printr1_d(ofs, " vsep : ", rv, mesh); + ofs << "\n -----------------------------"; +} + +#ifdef __MPI + +void SepPot::bcast_sep() +{ + ModuleBase::TITLE("SepPot", "bcast_sep"); + Parallel_Common::bcast_bool(is_enable); + Parallel_Common::bcast_double(r_in); + Parallel_Common::bcast_double(r_out); + Parallel_Common::bcast_double(r_power); + Parallel_Common::bcast_double(enhence_a); + Parallel_Common::bcast_string(label); + Parallel_Common::bcast_string(xc_type); + Parallel_Common::bcast_string(orbital); + Parallel_Common::bcast_int(strip_elec); + Parallel_Common::bcast_int(mesh); + + if (GlobalV::MY_RANK != 0 && mesh > 0) + { + r = new double[mesh]; + rv = new double[mesh]; + } + + Parallel_Common::bcast_double(r, mesh); + Parallel_Common::bcast_double(rv, mesh); + + return; +} +#endif // __MPI diff --git a/source/source_cell/sep.h b/source/source_cell/sep.h new file mode 100644 index 0000000000..20c95b528c --- /dev/null +++ b/source/source_cell/sep.h @@ -0,0 +1,39 @@ +#ifndef SEP_H +#define SEP_H + +#include +#include + +/** + * Sep Potential for DFT-1/2 etc. + * + * Sep Potential + */ +class SepPot +{ + public: + SepPot(); + ~SepPot(); + + bool is_enable = false; + double r_in = 0.0; /**< cut-off radius inner */ + double r_out = 0.0; /**< cut-off radius outter */ + double r_power = 20.0; /**< shell function exp factor */ + double enhence_a = 1.0; /**< scale sep potential */ + std::string label; /**< element nameof sep */ + std::string xc_type; /**< Exch-Corr type */ + std::string orbital; /** atomic angular moment s,p,d,f */ + int mesh = 0; /**< number of points in radial mesh */ + int strip_elec = 0; /**< strip electron amount 1->0.01 50->0.5 */ + double* r = nullptr; /**< ridial mesh */ + double* rv = nullptr; /**< sep potential, but rV, unit: Ry */ + + int read_sep(std::ifstream& is); + void print_sep_info(std::ofstream& ofs); + void print_sep_vsep(std::ofstream& ofs); +#ifdef __MPI + void bcast_sep(); +#endif /* ifdef __MPI */ +}; + +#endif /* ifndef SEP_H */ diff --git a/source/source_cell/sep_cell.cpp b/source/source_cell/sep_cell.cpp new file mode 100644 index 0000000000..814ea15cba --- /dev/null +++ b/source/source_cell/sep_cell.cpp @@ -0,0 +1,127 @@ +#include "sep_cell.h" + +#include "source_base/global_function.h" +#include "source_base/global_variable.h" +#include "source_base/parallel_common.h" +#include "source_base/tool_title.h" +#include "source_cell/unitcell.h" + +#include +#include + +namespace GlobalC +{ +Sep_Cell sep_cell; +} + +Sep_Cell::Sep_Cell() noexcept : ntype(0), omega(0.0), tpiba2(0.0) +{ +} + +Sep_Cell::~Sep_Cell() noexcept = default; + +void Sep_Cell::init(const int ntype_in) +{ + this->ntype = ntype_in; + this->seps.resize(ntype); + this->sep_enable.resize(ntype); + std::fill(this->sep_enable.begin(), this->sep_enable.end(), false); +} + +void Sep_Cell::set_omega(const double omega_in, const double tpiba2_in) +{ + this->omega = omega_in; + this->tpiba2 = tpiba2_in; +} + +/** + * read sep potential files + * + * need to add following lines in STRU file, and order of elements must match ATOMIC_SPECIES. + * SEP_FILES + * symbol is_enable r_in r_out r_power enhence_a + * + * example + * Li 0 + * F 1 F_pbe_50.sep 0.0 2.0 20.0 1.0 + */ +int Sep_Cell::read_sep_potentials(std::ifstream& ifpos, + const std::string& pp_dir, + std::ofstream& ofs_running, + UnitCell& ucell) +{ + ModuleBase::TITLE("Sep_Cell", "read_sep_potentials"); + + if (!ModuleBase::GlobalFunc::SCAN_BEGIN(ifpos, "SEP_FILES")) + { + GlobalV::ofs_running << "Cannot find SEP_FILES section in STRU" << std::endl; + return false; + } + + ifpos.ignore(300, '\n'); + + for (int i = 0; i < this->ntype; ++i) + { + std::string one_line, atom_label; + std::getline(ifpos, one_line); + std::stringstream ss(one_line); + + // read the label of the atom + bool enable_tmp; + ss >> atom_label >> enable_tmp; + + // Validate atom label + if (atom_label != ucell.atom_label[i]) + { + GlobalV::ofs_running << "Sep potential and atom order do not match. " + << "Expected: " << ucell.atoms[i].label << ", Got: " << atom_label << std::endl; + return false; + } + this->sep_enable[i] = enable_tmp; + if (this->sep_enable[i]) + { + this->seps[i].is_enable = this->sep_enable[i]; + std::string sep_filename; + ss >> sep_filename; + ss >> this->seps[i].r_in >> this->seps[i].r_out >> this->seps[i].r_power >> this->seps[i].enhence_a; + std::string sep_addr = pp_dir + sep_filename; + std::ifstream sep_ifs(sep_addr.c_str(), std::ios::in); + if (!sep_ifs) + { + GlobalV::ofs_running << "Cannot find sep potential file: " << sep_addr << std::endl; + return false; + } + this->seps[i].read_sep(sep_ifs); + } + } + + return true; +} + +#ifdef __MPI +void Sep_Cell::bcast_sep_cell() +{ + ModuleBase::TITLE("Sep_Cell", "bcast_sep_cell"); + Parallel_Common::bcast_int(this->ntype); + + if (GlobalV::MY_RANK != 0) + { + this->seps.resize(this->ntype); + this->sep_enable.resize(this->ntype); + } + for (int i = 0; i < this->ntype; ++i) + { + bool tmp = false; + if (GlobalV::MY_RANK == 0) + { + tmp = this->sep_enable[i]; + } + Parallel_Common::bcast_bool(tmp); + if (GlobalV::MY_RANK != 0) + { + this->sep_enable[i] = tmp; + } + this->seps[i].bcast_sep(); + } +} +#endif // __MPI diff --git a/source/source_cell/sep_cell.h b/source/source_cell/sep_cell.h new file mode 100644 index 0000000000..ebea3e3e6c --- /dev/null +++ b/source/source_cell/sep_cell.h @@ -0,0 +1,74 @@ +// The Sep_Cell class is container for Sep potential. + +#ifndef SEP_CELL +#define SEP_CELL + +#include "source_cell/sep.h" +#include "source_cell/unitcell.h" + +#include +#include + +class Sep_Cell +{ + public: + Sep_Cell() noexcept; + ~Sep_Cell() noexcept; + + // Sets the number of atom types and initializes internal vectors + void init(const int ntype_in); + + void set_omega(const double omega_in, const double tpiba2_in); + + // Reads self potentials from STRU file and xx.sep files + // Returns true if successful, false otherwise + int read_sep_potentials(std::ifstream& ifpos, + const std::string& pp_dir, + std::ofstream& ofs_running, + UnitCell& ucell); + +#ifdef __MPI + // Broadcasts the Sep_Cell object to all processes + void bcast_sep_cell(); +#endif // __MPI + + // Getter methods + const std::vector& get_seps() const + { + return seps; + } + int get_ntype() const + { + return ntype; + } + const std::vector& get_sep_enable() const + { + return sep_enable; + } + + double get_omega() const + { + return omega; + } + + double get_tpiba2() const + { + return tpiba2; + } + + private: + std::vector seps; // Self potentials for each atom type + int ntype; // number of atom types + std::vector sep_enable; // Whether self potential is enabled for each atom type + + // unit cell data for VSep + double omega; // unit cell Volume + double tpiba2; // tpiba ^ 2 +}; + +namespace GlobalC +{ +extern Sep_Cell sep_cell; +} + +#endif // SEP_CEll diff --git a/source/source_cell/test/CMakeLists.txt b/source/source_cell/test/CMakeLists.txt index 7d1a2ab8c2..6217e90be4 100644 --- a/source/source_cell/test/CMakeLists.txt +++ b/source/source_cell/test/CMakeLists.txt @@ -10,6 +10,8 @@ install(FILES bcast_atom_spec_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES parallel_kpoints_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES klist_test_para.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) install(FILES unitcell_test_parallel.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES bcast_read_sep_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) +install(FILES bcast_sep_cell_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) list(APPEND cell_simple_srcs ../unitcell.cpp @@ -33,6 +35,8 @@ list(APPEND cell_simple_srcs ../../source_estate/cal_wfc.cpp ../../source_estate/cal_nelec_nband.cpp ../../source_estate/read_orb.cpp + ../sep.cpp + ../sep_cell.cpp ) add_library(cell_info OBJECT ${cell_simple_srcs}) @@ -40,40 +44,40 @@ add_library(cell_info OBJECT ${cell_simple_srcs}) AddTest( TARGET MODULE_CELL_read_pp LIBS parameter ${math_libs} base device - SOURCES read_pp_test.cpp ../atom_pseudo.cpp ../pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp ../read_pp_vwr.cpp ../read_pp_blps.cpp + SOURCES read_pp_test.cpp ../atom_pseudo.cpp ../pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp ../read_pp_vwr.cpp ../read_pp_blps.cpp ../../source_io/output.cpp ) AddTest( TARGET MODULE_CELL_pseudo_nc LIBS parameter ${math_libs} base device - SOURCES pseudo_nc_test.cpp ../pseudo.cpp ../atom_pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp ../read_pp_vwr.cpp + SOURCES pseudo_nc_test.cpp ../pseudo.cpp ../atom_pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp ../read_pp_vwr.cpp ../read_pp_blps.cpp ../../source_io/output.cpp ) AddTest( TARGET MODULE_CELL_atom_pseudo LIBS parameter ${math_libs} base device - SOURCES atom_pseudo_test.cpp ../atom_pseudo.cpp ../pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp + SOURCES atom_pseudo_test.cpp ../atom_pseudo.cpp ../pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp ../read_pp_vwr.cpp ../read_pp_blps.cpp ../../source_io/output.cpp ) AddTest( TARGET MODULE_CELL_atom_spec - LIBS parameter ${math_libs} base device - SOURCES atom_spec_test.cpp ../atom_spec.cpp ../atom_pseudo.cpp ../pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp + LIBS parameter ${math_libs} base device + SOURCES atom_spec_test.cpp ../atom_spec.cpp ../atom_pseudo.cpp ../pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp ../read_pp_vwr.cpp ../read_pp_blps.cpp ../../source_io/output.cpp ) AddTest( TARGET MODULE_CELL_klist_test - LIBS parameter ${math_libs} base device symmetry + LIBS parameter ${math_libs} base device symmetry SOURCES klist_test.cpp ../klist.cpp ../parallel_kpoints.cpp ../../source_io/output.cpp ../k_vector_utils.cpp ) AddTest( TARGET MODULE_CELL_klist_test_para1 - LIBS parameter ${math_libs} base device symmetry + LIBS parameter ${math_libs} base device symmetry SOURCES klist_test_para.cpp ../klist.cpp ../parallel_kpoints.cpp ../../source_io/output.cpp ../k_vector_utils.cpp ) @@ -85,7 +89,7 @@ add_test(NAME MODULE_CELL_klist_test_para4 AddTest( TARGET MODULE_CELL_ParaKpoints LIBS parameter MPI::MPI_CXX - SOURCES parallel_kpoints_test.cpp ../../source_base/global_variable.cpp ../../source_base/parallel_global.cpp + SOURCES parallel_kpoints_test.cpp ../../source_base/global_variable.cpp ../../source_base/parallel_global.cpp ../../source_base/parallel_common.cpp ../../source_base/parallel_comm.cpp ../parallel_kpoints.cpp ) @@ -109,26 +113,26 @@ add_test(NAME MODULE_CELL_parallel_kpoints_test AddTest( TARGET MODULE_CELL_unitcell_test LIBS parameter ${math_libs} base device cell_info symmetry - SOURCES unitcell_test.cpp ../../source_io/output.cpp ../../source_estate/cal_ux.cpp + SOURCES unitcell_test.cpp ../../source_io/output.cpp ../../source_estate/cal_ux.cpp ) AddTest( TARGET MODULE_CELL_unitcell_test_readpp - LIBS parameter ${math_libs} base device cell_info - SOURCES unitcell_test_readpp.cpp ../../source_io/output.cpp + LIBS parameter ${math_libs} base device cell_info + SOURCES unitcell_test_readpp.cpp ../../source_io/output.cpp ) AddTest( TARGET MODULE_CELL_unitcell_test_para - LIBS parameter ${math_libs} base device cell_info + LIBS parameter ${math_libs} base device cell_info SOURCES unitcell_test_para.cpp ../../source_io/output.cpp ) AddTest( TARGET MODULE_CELL_unitcell_test_setupcell - LIBS parameter ${math_libs} base device cell_info - SOURCES unitcell_test_setupcell.cpp ../../source_io/output.cpp + LIBS parameter ${math_libs} base device cell_info + SOURCES unitcell_test_setupcell.cpp ../../source_io/output.cpp ) add_test(NAME MODULE_CELL_unitcell_test_parallel @@ -142,3 +146,24 @@ AddTest( SOURCES cell_index_test.cpp ../cell_index.cpp ) +AddTest( + TARGET MODULE_CELL_SEP_TEST + LIBS parameter ${math_libs} base device + SOURCES read_sep_test.cpp ../sep.cpp +) + +add_test(NAME MODULE_CELL_read_sep_parallel + COMMAND ${BASH} bcast_read_sep_test.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) + +AddTest( + TARGET MODULE_CELL_SEP_CELL_TEST + LIBS parameter ${math_libs} base device + SOURCES sepcell_test.cpp ../sep.cpp ../sep_cell.cpp +) + +add_test(NAME MODULE_CELL_sep_cell_parallel + COMMAND ${BASH} bcast_sep_cell_test.sh + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} +) diff --git a/source/source_cell/test/bcast_read_sep_test.sh b/source/source_cell/test/bcast_read_sep_test.sh new file mode 100644 index 0000000000..4a38e4f5e4 --- /dev/null +++ b/source/source_cell/test/bcast_read_sep_test.sh @@ -0,0 +1,18 @@ +#!/bin/bash -e + +np=$(cat /proc/cpuinfo | grep "cpu cores" | uniq | awk '{print $NF}') +echo "nprocs in this machine is $np" + +for i in 4; do + if [[ $i -gt $np ]]; then + continue + fi + echo "TEST in parallel, nprocs=$i" + mpirun -np $i ./MODULE_CELL_SEP_TEST + if [[ $? -ne 0 ]]; then + echo -e "\e[1;33m [ FAILED ] \e[0m" \ + "execute UT with $i cores error." + exit 1 + fi + break +done diff --git a/source/source_cell/test/bcast_sep_cell_test.sh b/source/source_cell/test/bcast_sep_cell_test.sh new file mode 100644 index 0000000000..79689516e3 --- /dev/null +++ b/source/source_cell/test/bcast_sep_cell_test.sh @@ -0,0 +1,18 @@ +#!/bin/bash -e + +np=$(cat /proc/cpuinfo | grep "cpu cores" | uniq | awk '{print $NF}') +echo "nprocs in this machine is $np" + +for i in 4; do + if [[ $i -gt $np ]]; then + continue + fi + echo "TEST in parallel, nprocs=$i" + mpirun -np $i ./MODULE_CELL_SEP_CELL_TEST + if [[ $? -ne 0 ]]; then + echo -e "\e[1;33m [ FAILED ] \e[0m" \ + "execute UT with $i cores error." + exit 1 + fi + break +done diff --git a/source/source_cell/test/read_sep_test.cpp b/source/source_cell/test/read_sep_test.cpp new file mode 100644 index 0000000000..0bfada1a36 --- /dev/null +++ b/source/source_cell/test/read_sep_test.cpp @@ -0,0 +1,146 @@ +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include +#define private public +#include "source_io/module_parameter/parameter.h" +#undef private + +#ifdef __MPI +#include +#endif // __MPI + +#define private public +#include "source_cell/sep.h" +#undef private + +class ReadSepTest : public testing::Test +{ + protected: + std::string output; + std::unique_ptr read_sep{new SepPot}; + + void SetUp() override + { + // Initialization default check + EXPECT_FALSE(read_sep->is_enable); + EXPECT_DOUBLE_EQ(read_sep->r_in, 0.0); + EXPECT_DOUBLE_EQ(read_sep->r_out, 0.0); + EXPECT_DOUBLE_EQ(read_sep->r_power, 20.0); + EXPECT_DOUBLE_EQ(read_sep->enhence_a, 1.0); + EXPECT_EQ(read_sep->mesh, 0); + EXPECT_EQ(read_sep->strip_elec, 0); + EXPECT_EQ(read_sep->r, nullptr); + EXPECT_EQ(read_sep->rv, nullptr); + } + + void TearDown() override + { + // Cleaning is done automatically in the destructor + } +}; + +TEST_F(ReadSepTest, ReadSep) +{ +#ifdef __MPI + if (GlobalV::MY_RANK == 0) + { +#endif // !__MPI + std::ifstream ifs; + ifs.open("./support/F_pbe_50.sep"); + ASSERT_TRUE(ifs.is_open()); + read_sep->read_sep(ifs); + ifs.close(); + EXPECT_EQ(read_sep->label, "F"); + EXPECT_EQ(read_sep->mesh, 1038); + EXPECT_EQ(read_sep->xc_type, "pbe"); + EXPECT_EQ(read_sep->strip_elec, 50); + + EXPECT_EQ(read_sep->r[0], 3.4643182373e-06); + EXPECT_NE(read_sep->r, nullptr); + EXPECT_NE(read_sep->rv, nullptr); +#ifdef __MPI + } +#endif // __MPI +} + +TEST_F(ReadSepTest, PrintSep) +{ +#ifdef __MPI + if (GlobalV::MY_RANK == 0) + { +#endif + // 设置测试数据 + read_sep->label = "F"; + read_sep->xc_type = "pbe"; + read_sep->orbital = "p"; + read_sep->strip_elec = 50; + read_sep->mesh = 2; + read_sep->r = new double[2]{0.1, 0.2}; + read_sep->rv = new double[2]{1.0, 2.0}; + + // 测试打印功能 + std::ofstream ofs("test_sep.out"); + read_sep->print_sep_info(ofs); + read_sep->print_sep_vsep(ofs); + ofs.close(); + + // 验证输出文件 + std::ifstream ifs("test_sep.out"); + std::string line; + std::vector lines; + while (std::getline(ifs, line)) + { + lines.push_back(line); + } + ifs.close(); + + EXPECT_THAT(lines, testing::Contains(" label F")); + EXPECT_THAT(lines, testing::Contains(" xc pbe")); + EXPECT_THAT(lines, testing::Contains(" orbital p")); + EXPECT_THAT(lines, testing::Contains(" strip electron50")); + EXPECT_THAT(lines, testing::Contains(" mesh 2")); + + std::remove("test_sep.out"); +#ifdef __MPI + } +#endif +} + +#ifdef __MPI +TEST_F(ReadSepTest, BcastSep) +{ + if (GlobalV::MY_RANK == 0) + { + std::ifstream ifs; + ifs.open("./support/F_pbe_50.sep"); + ASSERT_TRUE(ifs.is_open()); + read_sep->read_sep(ifs); + ifs.close(); + } + read_sep->bcast_sep(); + if (GlobalV::MY_RANK != 0) + { + EXPECT_EQ(read_sep->label, "F"); + EXPECT_EQ(read_sep->mesh, 1038); + EXPECT_EQ(read_sep->xc_type, "pbe"); + EXPECT_EQ(read_sep->strip_elec, 50); + EXPECT_DOUBLE_EQ(read_sep->r[0], 3.4643182373e-06); + EXPECT_NE(read_sep->r, nullptr); + EXPECT_NE(read_sep->rv, nullptr); + } +} + +int main(int argc, char** argv) +{ + MPI_Init(&argc, &argv); + testing::InitGoogleTest(&argc, argv); + + MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); + + int result = RUN_ALL_TESTS(); + + MPI_Finalize(); + return result; +} +#endif // __MPI diff --git a/source/source_cell/test/sepcell_test.cpp b/source/source_cell/test/sepcell_test.cpp new file mode 100644 index 0000000000..d03668c780 --- /dev/null +++ b/source/source_cell/test/sepcell_test.cpp @@ -0,0 +1,286 @@ +#include "gtest/gtest.h" +#include +// #include +#include + +#define private public +#include "source_io/module_parameter/parameter.h" +#undef private + +#ifdef __MPI +#include +#endif + +#define private public +#include "source_cell/sep_cell.h" +#include "source_cell/unitcell.h" +#undef private +pseudo::pseudo() +{ +} +pseudo::~pseudo() +{ +} +Atom_pseudo::Atom_pseudo() +{ +} +Atom_pseudo::~Atom_pseudo() +{ +} +Atom::Atom() +{ +} +Atom::~Atom() +{ +} +InfoNonlocal::InfoNonlocal() +{ +} +InfoNonlocal::~InfoNonlocal() +{ +} +LCAO_Orbitals::LCAO_Orbitals() +{ +} +LCAO_Orbitals::~LCAO_Orbitals() +{ +} +Magnetism::Magnetism() +{ +} +Magnetism::~Magnetism() +{ +} +UnitCell::UnitCell() +{ +} +UnitCell::~UnitCell() +{ +} + +// Test fixture for Sep_Cell tests +class SepCellTest : public ::testing::Test +{ + protected: + Sep_Cell sep_cell; + UnitCell ucell; + + // Names for temporary files used in tests + std::string stru_filename = "STRU_LiF"; + std::string stru_noLi_filename = "STRU_LiF_Warning1"; + std::string f_sep_filename = "F_pbe_50.sep"; + std::string pp_dir = "support/"; // Directory for pseudopotential files + + void SetUp() override + { + // Initialize UnitCell for tests that need it. + // This setup is common for many read_sep_potentials tests. + ucell.ntype = 2; + ucell.atom_label.resize(ucell.ntype); + ucell.atom_label[0] = "Li"; + ucell.atom_label[1] = "F"; + ucell.atoms = new Atom[ucell.ntype]; + ucell.atoms[0].label = "Li"; + ucell.atoms[0].na = 1; // Number of atoms of this type + ucell.atoms[1].label = "F"; + ucell.atoms[1].na = 1; + } + + void TearDown() override + { + delete[] ucell.atoms; + ucell.atoms = nullptr; + } +}; + +TEST_F(SepCellTest, Constructor) +{ + EXPECT_EQ(sep_cell.get_ntype(), 0); + EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 0.0); + EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.0); + EXPECT_TRUE(sep_cell.get_seps().empty()); + EXPECT_TRUE(sep_cell.get_sep_enable().empty()); +} + +TEST_F(SepCellTest, Init) +{ + sep_cell.init(2); + EXPECT_EQ(sep_cell.get_ntype(), 2); + ASSERT_EQ(sep_cell.get_seps().size(), 2); + ASSERT_EQ(sep_cell.get_sep_enable().size(), 2); + EXPECT_FALSE(sep_cell.get_sep_enable()[0]); + EXPECT_FALSE(sep_cell.get_sep_enable()[1]); + // Check default values of SepPot within seps + EXPECT_EQ(sep_cell.get_seps()[0].mesh, 0); + EXPECT_FALSE(sep_cell.get_seps()[0].is_enable); +} + +TEST_F(SepCellTest, SetOmega) +{ + sep_cell.set_omega(100.0, 0.25); + EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 100.0); + EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.25); +} + +TEST_F(SepCellTest, ReadSepPotentialsSuccess) +{ +#ifdef __MPI + if (GlobalV::MY_RANK == 0) + { +#endif + + std::ifstream ifs(pp_dir + stru_filename); + ASSERT_TRUE(ifs.is_open()); + + sep_cell.init(ucell.ntype); + std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); + int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell); + ifs.close(); + std::remove("dummy_ofs_running.tmp"); + + EXPECT_EQ(result, 1); // Expect success (true) + + // Due to the bug mentioned (this->sep_enable[i] is always false), + // SEP data won't actually be loaded. + ASSERT_EQ(sep_cell.get_sep_enable().size(), 2); + EXPECT_FALSE(sep_cell.get_sep_enable()[0]); // Stays false from init + EXPECT_TRUE(sep_cell.get_sep_enable()[1]); // Stays false from init + + const auto& seps = sep_cell.get_seps(); + ASSERT_EQ(seps.size(), 2); + EXPECT_FALSE(seps[0].is_enable); // Default value, not set from file + EXPECT_EQ(seps[0].mesh, 0); // Default value + EXPECT_EQ(seps[0].label, ""); // Default value + + EXPECT_TRUE(seps[1].is_enable); // Default value + EXPECT_EQ(seps[1].mesh, 1038); // Default value + EXPECT_EQ(seps[1].label, "F"); // Default value + EXPECT_DOUBLE_EQ(seps[1].r_in, 0.0); + EXPECT_DOUBLE_EQ(seps[1].r_out, 2.5); + EXPECT_DOUBLE_EQ(seps[1].r_power, 20.0); + EXPECT_DOUBLE_EQ(seps[1].enhence_a, 1.0); +#ifdef __MPI + } + // If run in MPI, other ranks might need to know the outcome or have sep_cell state consistent. + // For this specific test, only rank 0 performs the read. + // A broadcast test would cover data consistency across ranks. +#endif +} + +TEST_F(SepCellTest, ReadSepPotentialsNoSepFilesSection) +{ +#ifdef __MPI + if (GlobalV::MY_RANK == 0) + { +#endif + + std::ifstream ifs(pp_dir + stru_noLi_filename); + ASSERT_TRUE(ifs.is_open()); + std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); + + sep_cell.init(ucell.ntype); + int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell); + ifs.close(); + std::remove("dummy_ofs_running.tmp"); + + EXPECT_EQ(result, 0); // Expect failure (false) because "SEP_FILES" not found +#ifdef __MPI + } +#endif +} + +#ifdef __MPI +TEST_F(SepCellTest, BcastSepCell) +{ + sep_cell.init(2); // ntype = 2 + // Rank 0 prepares some data (or reads from file) + if (GlobalV::MY_RANK == 0) + { + sep_cell.set_omega(150.0, 0.75); + std::ifstream ifs(pp_dir + stru_filename); + ASSERT_TRUE(ifs.is_open()); + + sep_cell.init(ucell.ntype); + std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); + int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell); + ifs.close(); + std::remove("dummy_ofs_running.tmp"); + + EXPECT_EQ(result, 1); // Expect success (true) + } + + sep_cell.bcast_sep_cell(); + + // All ranks should have the same data + EXPECT_EQ(sep_cell.get_ntype(), 2); + // Omega and tpiba2 are NOT part of Sep_Cell::bcast_sep_cell, so they remain default on non-zero ranks + if (GlobalV::MY_RANK == 0) + { + EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 150.0); + EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.75); + } + else + { + EXPECT_DOUBLE_EQ(sep_cell.get_omega(), 0.0); // Default + EXPECT_DOUBLE_EQ(sep_cell.get_tpiba2(), 0.0); // Default + } + + ASSERT_EQ(sep_cell.get_sep_enable().size(), 2); + // sep_enable will be broadcast as false from rank 0 due to read_sep_potentials bug + EXPECT_FALSE(sep_cell.get_sep_enable()[0]); + EXPECT_TRUE(sep_cell.get_sep_enable()[1]); + + const auto& seps = sep_cell.get_seps(); + ASSERT_EQ(seps.size(), 2); + + // Check SepPot data (will be default values due to bug and current test setup) + EXPECT_EQ(seps[0].label, ""); // Default broadcasted + EXPECT_EQ(seps[0].mesh, 0); // Default broadcasted + EXPECT_FALSE(seps[0].is_enable); // Default broadcasted + + EXPECT_EQ(seps[1].label, "F"); // Default broadcasted + EXPECT_EQ(seps[1].mesh, 1038); // Default broadcasted + EXPECT_TRUE(seps[1].is_enable); // Default broadcasted + // Note: SepPot::bcast_sep() allocates memory for r and rv on all ranks + // whenever mesh > 0, regardless of is_enable status + if (seps[0].mesh > 0) + { + EXPECT_NE(seps[0].r, nullptr); + EXPECT_NE(seps[0].rv, nullptr); + } + else + { + EXPECT_EQ(seps[0].r, nullptr); + EXPECT_EQ(seps[0].rv, nullptr); + } + EXPECT_NE(seps[1].r, nullptr); + EXPECT_NE(seps[1].rv, nullptr); + EXPECT_DOUBLE_EQ(seps[1].r[0], 3.4643182373e-06); + EXPECT_DOUBLE_EQ(seps[1].rv[0], -2.0868200000e-05); + EXPECT_DOUBLE_EQ(seps[1].r[7], 2.8965849122e-05); + EXPECT_DOUBLE_EQ(seps[1].rv[7], -1.9723800000e-05); +} +#endif // __MPI + +// Main function for running tests +int main(int argc, char** argv) +{ +#ifdef __MPI + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &GlobalV::NPROC); + MPI_Comm_rank(MPI_COMM_WORLD, &GlobalV::MY_RANK); +#endif + + testing::InitGoogleTest(&argc, argv); + + // Potentially initialize GlobalV::ofs_running here if not handled by test infra + // e.g., if (GlobalV::MY_RANK == 0) GlobalV::ofs_running.open("sep_cell_test.log"); + // For now, assume it's usable or output to console/dev_null is acceptable. + + int result = RUN_ALL_TESTS(); + +#ifdef __MPI + MPI_Finalize(); +#endif + return result; +} diff --git a/source/source_cell/test/support/C_ca_50.sep b/source/source_cell/test/support/C_ca_50.sep new file mode 100644 index 0000000000..325c9aa6e0 --- /dev/null +++ b/source/source_cell/test/support/C_ca_50.sep @@ -0,0 +1,1014 @@ +Sep.Element C +Sep.XcType ca +Sep.Orbital p +Sep.Points 1006 +Sep.StripAmount 50 + + diff --git a/source/source_cell/test/support/F_pbe_50.sep b/source/source_cell/test/support/F_pbe_50.sep new file mode 100644 index 0000000000..c8bc18b301 --- /dev/null +++ b/source/source_cell/test/support/F_pbe_50.sep @@ -0,0 +1,1046 @@ +Sep.Element F +Sep.XcType pbe +Sep.Orbital p +Sep.Points 1038 +Sep.StripAmount 50 + + diff --git a/source/source_cell/test/support/STRU_LiF b/source/source_cell/test/support/STRU_LiF new file mode 100644 index 0000000000..16f61bcc80 --- /dev/null +++ b/source/source_cell/test/support/STRU_LiF @@ -0,0 +1,28 @@ +ATOMIC_SPECIES +Li 14.0000 Li_ONCV_PBE-1.2.upf upf201 +F 0.0000 F_ONCV_PBE-1.2.upf upf201 + +LATTICE_CONSTANT +1.0000000000 + +LATTICE_VECTORS + 0.0000000000 3.8379626543 3.8379626543 + 3.8379626543 0.0000000000 3.8379626543 + 3.8379626543 3.8379626543 0.0000000000 + +SEP_FILES +Li 0 +F 1 F_pbe_50.sep 0.0 2.5 20.0 1.0 + +ATOMIC_POSITIONS +Direct + +Li #label +0.0000 #magnetism +1 #number of atoms + 0.0000000000 0.0000000000 0.0000000000 m 1 1 1 + +F #label +0.0000 #magnetism +1 #number of atoms + 0.5000000000 0.5000000000 0.5000000000 m 1 1 1 diff --git a/source/source_cell/test/support/STRU_LiF_Warning1 b/source/source_cell/test/support/STRU_LiF_Warning1 new file mode 100644 index 0000000000..9e110b24db --- /dev/null +++ b/source/source_cell/test/support/STRU_LiF_Warning1 @@ -0,0 +1,27 @@ +ATOMIC_SPECIES +Li 14.0000 Li_ONCV_PBE-1.2.upf upf201 +F 0.0000 F_ONCV_PBE-1.2.upf upf201 + +LATTICE_CONSTANT +1.0000000000 + +LATTICE_VECTORS + 0.0000000000 3.8379626543 3.8379626543 + 3.8379626543 0.0000000000 3.8379626543 + 3.8379626543 3.8379626543 0.0000000000 + +SEP_FILES +F 1 F_pbe_50.sep 0.0 2.5 20.0 1.0 + +ATOMIC_POSITIONS +Direct + +Li #label +0.0000 #magnetism +1 #number of atoms + 0.0000000000 0.0000000000 0.0000000000 m 1 1 1 + +F #label +0.0000 #magnetism +1 #number of atoms + 0.5000000000 0.5000000000 0.5000000000 m 1 1 1 diff --git a/source/source_cell/test_pw/CMakeLists.txt b/source/source_cell/test_pw/CMakeLists.txt index 1776bf38c2..68818a25ca 100644 --- a/source/source_cell/test_pw/CMakeLists.txt +++ b/source/source_cell/test_pw/CMakeLists.txt @@ -9,14 +9,14 @@ install(FILES unitcell_test_pw_para.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) AddTest( TARGET MODULE_CELL_unitcell_test_pw - LIBS parameter ${math_libs} base device + LIBS parameter ${math_libs} base device SOURCES unitcell_test_pw.cpp ../unitcell.cpp ../read_atoms.cpp ../atom_spec.cpp ../update_cell.cpp ../bcast_cell.cpp ../atom_pseudo.cpp ../pseudo.cpp ../read_pp.cpp ../read_pp_complete.cpp ../read_pp_upf201.cpp ../read_pp_upf100.cpp ../read_stru.cpp ../read_atom_species.cpp - ../read_pp_vwr.cpp ../read_pp_blps.cpp ../../source_io/output.cpp + ../read_pp_vwr.cpp ../read_pp_blps.cpp ../../source_io/output.cpp ../../source_estate/read_pseudo.cpp ../../source_estate/cal_nelec_nband.cpp ../../source_estate/read_orb.cpp ../../source_cell/print_cell.cpp - ../../source_estate/cal_wfc.cpp + ../../source_estate/cal_wfc.cpp ../sep.cpp ../sep_cell.cpp ) find_program(BASH bash) diff --git a/source/source_cell/unitcell.cpp b/source/source_cell/unitcell.cpp index 44b620089c..b607623408 100644 --- a/source/source_cell/unitcell.cpp +++ b/source/source_cell/unitcell.cpp @@ -6,6 +6,7 @@ #include "source_base/global_variable.h" #include "unitcell.h" #include "bcast_cell.h" +#include "source_base/tool_quit.h" #include "source_io/module_parameter/parameter.h" #include "source_cell/read_stru.h" #include "source_base/atom_in.h" @@ -13,6 +14,7 @@ #include "source_base/global_file.h" #include "source_base/parallel_common.h" #include "source_io/module_parameter/parameter.h" +#include "source_cell/sep_cell.h" #ifdef __MPI #include "mpi.h" @@ -23,14 +25,14 @@ #endif #include "update_cell.h" -UnitCell::UnitCell() +UnitCell::UnitCell() { itia2iat.create(1, 1); } -UnitCell::~UnitCell() +UnitCell::~UnitCell() { - if (set_atom_flag) + if (set_atom_flag) { delete[] atoms; } @@ -181,7 +183,7 @@ std::vector> UnitCell::get_constrain() const //============================================================== // Calculate various lattice related quantities for given latvec //============================================================== -void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) +void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) { ModuleBase::TITLE("UnitCell", "setup_cell"); @@ -200,6 +202,8 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) bool ok = true; bool ok2 = true; + bool ok3 = true; // for sep potential in DFT-1/2 + // (3) read in atom information this->atom_mass.resize(ntype); this->atom_label.resize(ntype); @@ -207,17 +211,17 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) this->pseudo_type.resize(ntype); this->orbital_fn.resize(ntype); - if (GlobalV::MY_RANK == 0) + if (GlobalV::MY_RANK == 0) { // open "atom_unitcell" file. std::ifstream ifa(fn.c_str(), std::ios::in); - if (!ifa) + if (!ifa) { GlobalV::ofs_warning << fn; ok = false; } - if (ok) + if (ok) { log << "\n\n"; log << " >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << std::endl; @@ -245,6 +249,13 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) //======================== const bool read_lattice_constant = unitcell::read_lattice_constant(ifa, log ,this->lat); //========================== + // readl sep potential, currently using the pseudopotential folder (pseudo_dir in INPUT) + //========================== + if (PARAM.inp.dfthalf_type > 0) { + GlobalC::sep_cell.init(this->ntype); + ok3 = GlobalC::sep_cell.read_sep_potentials(ifa, PARAM.inp.pseudo_dir, GlobalV::ofs_warning, *this); + } + //========================== // call read_atom_positions //========================== ok2 = unitcell::read_atom_positions(*this, ifa, log, GlobalV::ofs_warning); @@ -253,6 +264,7 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) #ifdef __MPI Parallel_Common::bcast_bool(ok); Parallel_Common::bcast_bool(ok2); + Parallel_Common::bcast_bool(ok3); #endif if (!ok) { ModuleBase::WARNING_QUIT( @@ -263,9 +275,13 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) ModuleBase::WARNING_QUIT("UnitCell::setup_cell", "Something wrong during read_atom_positions."); } + if (!ok3) { + ModuleBase::WARNING_QUIT("UnitCell::setup_cell", "Something wrong during read_sep_potentials"); + } #ifdef __MPI unitcell::bcast_unitcell(*this); + GlobalC::sep_cell.bcast_sep_cell(); #endif //======================================================== @@ -282,11 +298,11 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; std::cout << " Warning: The lattice vector is left-handed; a right-handed vector is prefered." << std::endl; std::cout << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - GlobalV::ofs_warning << + GlobalV::ofs_warning << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; - GlobalV::ofs_warning << + GlobalV::ofs_warning << " Warning: The lattice vector is left-handed; a right-handed vector is prefered." << std::endl; - GlobalV::ofs_warning << + GlobalV::ofs_warning << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" << std::endl; this->omega = std::abs(this->omega); } @@ -329,11 +345,13 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) //=================================== this->set_iat2itia(); + GlobalC::sep_cell.set_omega(this->omega, this->tpiba2); + return; } -void UnitCell::set_iat2iwt(const int& npol_in) +void UnitCell::set_iat2iwt(const int& npol_in) { #ifdef __DEBUG assert(npol_in == 1 || npol_in == 2); @@ -345,9 +363,9 @@ void UnitCell::set_iat2iwt(const int& npol_in) int iat = 0; int iwt = 0; - for (int it = 0; it < this->ntype; it++) + for (int it = 0; it < this->ntype; it++) { - for (int ia = 0; ia < atoms[it].na; ia++) + for (int ia = 0; ia < atoms[it].na; ia++) { this->iat2iwt[iat] = iwt; iwt += atoms[it].nw * this->npol; @@ -360,14 +378,14 @@ void UnitCell::set_iat2iwt(const int& npol_in) // check if any atom can be moved -bool UnitCell::if_atoms_can_move() const +bool UnitCell::if_atoms_can_move() const { - for (int it = 0; it < this->ntype; it++) + for (int it = 0; it < this->ntype; it++) { Atom* atom = &atoms[it]; - for (int ia = 0; ia < atom->na; ia++) + for (int ia = 0; ia < atom->na; ia++) { - if (atom->mbl[ia].x || atom->mbl[ia].y || atom->mbl[ia].z) + if (atom->mbl[ia].x || atom->mbl[ia].y || atom->mbl[ia].z) { return true; } @@ -377,10 +395,10 @@ bool UnitCell::if_atoms_can_move() const } // check if lattice vector can be changed -bool UnitCell::if_cell_can_change() const +bool UnitCell::if_cell_can_change() const { // need to be fixed next - if (this->lc[0] || this->lc[1] || this->lc[2]) + if (this->lc[0] || this->lc[1] || this->lc[2]) { return true; } @@ -457,7 +475,7 @@ void UnitCell::setup(const std::string& latname_in, } -void UnitCell::compare_atom_labels(const std::string &label1, const std::string &label2) +void UnitCell::compare_atom_labels(const std::string &label1, const std::string &label2) { if (label1!= label2) //'!( "Ag" == "Ag" || "47" == "47" || "Silver" == Silver" )' { @@ -475,26 +493,26 @@ void UnitCell::compare_atom_labels(const std::string &label1, const std::string { std::string stru_label = ""; std::string psuedo_label = ""; - for (int ip = 0; ip < label1.length(); ip++) + for (int ip = 0; ip < label1.length(); ip++) { - if (!(isdigit(label1[ip]) || label1[ip] == '_')) + if (!(isdigit(label1[ip]) || label1[ip] == '_')) { stru_label += label1[ip]; - } - else + } + else { break; } } stru_label[0] = toupper(stru_label[0]); - for (int ip = 0; ip < label2.length(); ip++) + for (int ip = 0; ip < label2.length(); ip++) { - if (!(isdigit(label2[ip]) || label2[ip] == '_')) + if (!(isdigit(label2[ip]) || label2[ip] == '_')) { psuedo_label += label2[ip]; - } - else + } + else { break; } diff --git a/source/source_esolver/esolver_ks_pw.cpp b/source/source_esolver/esolver_ks_pw.cpp index 36435c4645..820cc76c00 100644 --- a/source/source_esolver/esolver_ks_pw.cpp +++ b/source/source_esolver/esolver_ks_pw.cpp @@ -26,6 +26,7 @@ #include "source_io/write_wfc_pw.h" #include "source_lcao/module_deltaspin/spin_constrain.h" #include "source_lcao/module_dftu/dftu.h" +#include "source_pw/module_pwdft/VSep_in_pw.h" #include "source_pw/module_pwdft/elecond.h" #include "source_pw/module_pwdft/forces.h" #include "source_pw/module_pwdft/hamilt_pw.h" @@ -239,6 +240,11 @@ void ESolver_KS_PW::before_all_runners(UnitCell& ucell, const Input_p exx_helper.set_wg(&this->pelec->wg); } } + + // 10) initialize DFT-1/2 + if (PARAM.inp.dfthalf_type > 0) { + GlobalC::vsep_cell.init_vsep(*this->pw_rhod); + } } template @@ -292,6 +298,13 @@ void ESolver_KS_PW::before_scf(UnitCell& ucell, const int istep) } } + //---------------------------------------------------------- + //! 4.5) DFT-1/2 calculations, sep potential need to generate before effective potential calculation + //---------------------------------------------------------- + if (PARAM.inp.dfthalf_type > 0) { + GlobalC::vsep_cell.generate_vsep_r(this->pw_rhod[0], this->sf.strucFac); + } + //---------------------------------------------------------- //! 5) Renew local pseudopotential //---------------------------------------------------------- diff --git a/source/source_estate/CMakeLists.txt b/source/source_estate/CMakeLists.txt index 0bdd120879..651c602822 100644 --- a/source/source_estate/CMakeLists.txt +++ b/source/source_estate/CMakeLists.txt @@ -16,6 +16,7 @@ list(APPEND objects module_pot/pot_local_paw.cpp module_pot/potential_new.cpp module_pot/potential_types.cpp + module_pot/pot_sep.cpp module_charge/charge.cpp module_charge/charge_init.cpp module_charge/charge_mpi.cpp @@ -69,4 +70,4 @@ endif() if(ENABLE_LCAO) add_subdirectory(module_dm) -endif() \ No newline at end of file +endif() diff --git a/source/source_estate/module_pot/pot_sep.cpp b/source/source_estate/module_pot/pot_sep.cpp new file mode 100644 index 0000000000..a51a3a222e --- /dev/null +++ b/source/source_estate/module_pot/pot_sep.cpp @@ -0,0 +1,25 @@ +#include "pot_sep.h" + +#include "source_base/timer.h" +#include "source_base/tool_title.h" + +namespace elecstate +{ +void PotSep::cal_fixed_v(double* vl_pseudo) +{ + ModuleBase::TITLE("PotSep", "cal_fixed_v"); + ModuleBase::timer::tick("PotSep", "cal_fixed_v"); + + // GlobalC::vsep_cell.generate_vsep_r(this->rho_basis_[0], this->sf_[0]); + + // const_cast(this->vsep_)->generate_vsep_r(this->rho_basis_[0], this->sf_[0]); + + for (int ir = 0; ir < this->rho_basis_->nrxx; ++ir) + { + vl_pseudo[ir] += GlobalC::vsep_cell.vsep_r[ir]; + } + + ModuleBase::timer::tick("PotSep", "cal_fixed_v"); + return; +} +} // namespace elecstate diff --git a/source/source_estate/module_pot/pot_sep.h b/source/source_estate/module_pot/pot_sep.h new file mode 100644 index 0000000000..d42fcfe7a9 --- /dev/null +++ b/source/source_estate/module_pot/pot_sep.h @@ -0,0 +1,46 @@ +#ifndef POTSEP_H +#define POTSEP_H + +#include "pot_base.h" +#include "source_base/matrix.h" +#include "source_pw/module_pwdft/VSep_in_pw.h" + +namespace elecstate +{ + +class PotSep : public PotBase +{ + public: + // PotSep(const ModuleBase::matrix* vsep_in, + // const ModuleBase::ComplexMatrix* sf_in, + // const ModulePW::PW_Basis* rho_basis_in, + // const bool* sep_enable_in) + // : vsep_(vsep_in), sf_(sf_in), sep_enable_(sep_enable_in) + // { + // assert(this->vsep_->nr == this->sf_->nr); + // this->rho_basis_ = rho_basis_in; + // this->ntype_ = this->vsep_->nr; + // this->fixed_mode = true; + // this->dynamic_mode = false; + // } + PotSep(const ModuleBase::ComplexMatrix* sf_in, const ModulePW::PW_Basis* rho_basis_in) : sf_(sf_in) + { + assert(GlobalC::vsep_cell.vsep_form.nr == this->sf_->nr); + // assert(this->vsep_->vsep_form.nr == this->sf_->nr); + this->rho_basis_ = rho_basis_in; + // this->ntype_ = this->vsep_->vsep_form.nr; + this->fixed_mode = true; + this->dynamic_mode = false; + } + + void cal_fixed_v(double* vl_pseudo) override; + + // const VSep* vsep_ = nullptr; + const ModuleBase::ComplexMatrix* sf_ = nullptr; + // int ntype_ = 0; + // const bool* sep_enable_; +}; + +} // namespace elecstate + +#endif /* ifndef POTSEP_H */ diff --git a/source/source_estate/module_pot/potential_types.cpp b/source/source_estate/module_pot/potential_types.cpp index 9153dd4203..db647788d2 100644 --- a/source/source_estate/module_pot/potential_types.cpp +++ b/source/source_estate/module_pot/potential_types.cpp @@ -12,6 +12,7 @@ #include "pot_surchem.hpp" #include "pot_xc.h" #include "potential_new.h" +#include "pot_sep.h" #include "pot_local_paw.h" #ifdef __LCAO #include "H_TDDFT_pw.h" @@ -56,6 +57,9 @@ PotBase* Potential::get_pot_type(const std::string& pot_type) return new H_TDDFT_pw(this->rho_basis_, this->ucell_); } #endif + else if (pot_type == "dfthalf") { + return new PotSep(&(this->structure_factors_->strucFac), this->rho_basis_); + } else { ModuleBase::WARNING_QUIT("Potential::get_pot_type", "Please input correct component of potential!"); @@ -63,4 +67,4 @@ PotBase* Potential::get_pot_type(const std::string& pot_type) } } -} // namespace elecstate \ No newline at end of file +} // namespace elecstate diff --git a/source/source_io/module_parameter/input_parameter.h b/source/source_io/module_parameter/input_parameter.h index 65a82ac2ac..831db1b158 100644 --- a/source/source_io/module_parameter/input_parameter.h +++ b/source/source_io/module_parameter/input_parameter.h @@ -124,6 +124,8 @@ struct Input_para bool noncolin = false; ///< using non-collinear-spin double soc_lambda = 1.0; ///< The fraction of SOC based on scalar relativity (SR) of the pseudopotential + int dfthalf_type = 0; ///< DFT-1/2 type, 0:off, 1: shell DFT-1/2 + // ============== #Parameters (3.LCAO) =========================== int nb2d = 0; ///< matrix 2d division. int lmaxmax = 2; ///< maximum of l channels used @@ -659,29 +661,29 @@ struct Input_para * the following two sets of parameters are for the XC parameterization. * The first element should be the LibXC id, to assign the analytical * form of the eXchange and Correlation part of the functional. - * + * * Starting from the second parameter, the parameters are the coefficients * of the functional. For example the M06-L functional, one should refer * to the source file (source code of LibXC) - * + * * src/mgga_x_m06l.c - * + * * the implementation can be found in the file - * + * * src/maple2c/mgga_exc/mgga_x_m06l.c. - * + * * There are 18 parameters for the exchange part, so the whole length of * the xc_exch_ext should be 19. (MGGA_X_M06L, id = 203) - * + * * Likewise, the correlation part can be found in corresponding files. - * + * * PBE functional is used as the default functional for XCPNet. */ // src/gga_x_pbe.c std::vector xc_exch_ext = { - 101, 0.8040, 0.2195149727645171}; + 101, 0.8040, 0.2195149727645171}; // src/gga_c_pbe.c std::vector xc_corr_ext = { - 130, 0.06672455060314922, 0.031090690869654895034, 1.00000}; + 130, 0.06672455060314922, 0.031090690869654895034, 1.00000}; }; #endif diff --git a/source/source_io/read_input_item_elec_stru.cpp b/source/source_io/read_input_item_elec_stru.cpp index 1af097c405..900bc72e9c 100644 --- a/source/source_io/read_input_item_elec_stru.cpp +++ b/source/source_io/read_input_item_elec_stru.cpp @@ -128,7 +128,7 @@ void ReadInput::item_elec_stru() #endif #ifndef __CUDA warningstr = "ks_solver is set to " + ks_solver + " but ABACUS is built with CPU only!\n" - + " Please rebuild ABACUS with GPU support or change the ks_solver."; + + " Please rebuild ABACUS with GPU support or change the ks_solver."; ModuleBase::WARNING_QUIT("ReadInput", warningstr); #endif if( ks_solver == "cusolvermp") @@ -137,7 +137,7 @@ void ReadInput::item_elec_stru() warningstr = "ks_solver is set to cusolvermp, but ABACUS is not built with cusolvermp support\n" " Please rebuild ABACUS with cusolvermp support or change the ks_solver."; ModuleBase::WARNING_QUIT("ReadInput", warningstr); -#endif +#endif } } else if (ks_solver == "pexsi") @@ -274,13 +274,13 @@ void ReadInput::item_elec_stru() const double libxc_id_dbl = para.input.xc_exch_ext[0]; if (std::abs(libxc_id_dbl - std::round(libxc_id_dbl)) > 1.0e-6) { - ModuleBase::WARNING_QUIT("ReadInput", + ModuleBase::WARNING_QUIT("ReadInput", "The first parameter (libxc id) can never be a float number"); } // the first value is a positive integer if (libxc_id_dbl < 0) { - ModuleBase::WARNING_QUIT("ReadInput", + ModuleBase::WARNING_QUIT("ReadInput", "The first parameter (libxc id) should be a positive integer"); } }; @@ -308,13 +308,13 @@ void ReadInput::item_elec_stru() const double libxc_id_dbl = para.input.xc_corr_ext[0]; if (std::abs(libxc_id_dbl - std::round(libxc_id_dbl)) > 1.0e-6) { - ModuleBase::WARNING_QUIT("ReadInput", + ModuleBase::WARNING_QUIT("ReadInput", "The first parameter (libxc id) can never be a float number"); } // the first value is a positive integer if (libxc_id_dbl < 0) { - ModuleBase::WARNING_QUIT("ReadInput", + ModuleBase::WARNING_QUIT("ReadInput", "The first parameter (libxc id) should be a positive integer"); } }; @@ -418,7 +418,7 @@ void ReadInput::item_elec_stru() item.annotation = "type of smearing_method: gauss; fd; fixed; mp; mp2; mv"; read_sync_string(input.smearing_method); item.check_value = [](const Input_Item& item, const Parameter& para) { - const std::vector methods = {"gauss", "gaussian", + const std::vector methods = {"gauss", "gaussian", "fd", "fermi-dirac", "fixed", "mp", "mp2", "mp3" @@ -575,9 +575,9 @@ void ReadInput::item_elec_stru() "set to 1, a fast algorithm is used"; read_sync_bool(input.gamma_only); item.reset_value = [](const Input_Item& item, Parameter& para) { - if (para.input.basis_type == "pw" && para.input.gamma_only) + if (para.input.basis_type == "pw" && para.input.gamma_only) { - para.input.gamma_only = false; + para.input.gamma_only = false; GlobalV::ofs_warning << " WARNING : gamma_only has not been implemented for pw yet" << std::endl; GlobalV::ofs_warning << "gamma_only is not supported in the pw model" << std::endl; GlobalV::ofs_warning << " the INPUT parameter gamma_only has been reset to 0" << std::endl; @@ -892,5 +892,11 @@ void ReadInput::item_elec_stru() read_sync_double(input.bessel_nao_sigma); this->add_item(item); } + { + Input_Item item("dfthalf_type"); + item.annotation = "DFT-1/2 type, 0:off; 1:shell DFT-1/2"; + read_sync_int(input.dfthalf_type); + this->add_item(item); + } } } // namespace ModuleIO diff --git a/source/source_md/test/CMakeLists.txt b/source/source_md/test/CMakeLists.txt index dee30a6ab7..516fc52b0c 100644 --- a/source/source_md/test/CMakeLists.txt +++ b/source/source_md/test/CMakeLists.txt @@ -1,7 +1,7 @@ remove_definitions(-D__MPI -D__LCAO ) add_definitions(-D__NORMAL) -list(APPEND depend_files +list(APPEND depend_files ../md_func.cpp ../../source_cell/unitcell.cpp ../../source_cell/update_cell.cpp @@ -56,25 +56,27 @@ list(APPEND depend_files ../../source_estate/cal_wfc.cpp ../../source_estate/cal_nelec_nband.cpp ../../source_estate/read_orb.cpp + ../../source_cell/sep.cpp + ../../source_cell/sep_cell.cpp ) AddTest( TARGET MODULE_MD_LJ_pot - LIBS parameter ${math_libs} psi device - SOURCES lj_pot_test.cpp + LIBS parameter ${math_libs} psi device + SOURCES lj_pot_test.cpp ${depend_files} ) AddTest( TARGET MODULE_MD_func - LIBS parameter ${math_libs} psi device - SOURCES md_func_test.cpp + LIBS parameter ${math_libs} psi device + SOURCES md_func_test.cpp ${depend_files} ) AddTest( TARGET MODULE_MD_fire - LIBS parameter ${math_libs} psi device + LIBS parameter ${math_libs} psi device SOURCES fire_test.cpp ../md_base.cpp ../fire.cpp @@ -83,7 +85,7 @@ AddTest( AddTest( TARGET MODULE_MD_verlet - LIBS parameter ${math_libs} psi device + LIBS parameter ${math_libs} psi device SOURCES verlet_test.cpp ../md_base.cpp ../verlet.cpp @@ -92,7 +94,7 @@ AddTest( AddTest( TARGET MODULE_MD_nhc - LIBS parameter ${math_libs} psi device + LIBS parameter ${math_libs} psi device SOURCES nhchain_test.cpp ../md_base.cpp ../nhchain.cpp @@ -102,7 +104,7 @@ AddTest( AddTest( TARGET MODULE_MD_msst - LIBS parameter ${math_libs} psi device + LIBS parameter ${math_libs} psi device SOURCES msst_test.cpp ../md_base.cpp ../msst.cpp @@ -113,7 +115,7 @@ AddTest( AddTest( TARGET MODULE_MD_lgv - LIBS parameter ${math_libs} psi device + LIBS parameter ${math_libs} psi device SOURCES langevin_test.cpp ../md_base.cpp ../langevin.cpp diff --git a/source/source_pw/module_pwdft/CMakeLists.txt b/source/source_pw/module_pwdft/CMakeLists.txt index 03e808f6e6..a68d4d1c7c 100644 --- a/source/source_pw/module_pwdft/CMakeLists.txt +++ b/source/source_pw/module_pwdft/CMakeLists.txt @@ -42,6 +42,7 @@ list(APPEND objects radial_proj.cpp onsite_projector.cpp onsite_proj_tools.cpp + VSep_in_pw.cpp ) add_library( diff --git a/source/source_pw/module_pwdft/VSep_in_pw.cpp b/source/source_pw/module_pwdft/VSep_in_pw.cpp new file mode 100644 index 0000000000..8aa9187c38 --- /dev/null +++ b/source/source_pw/module_pwdft/VSep_in_pw.cpp @@ -0,0 +1,152 @@ +#include "VSep_in_pw.h" + +#include "source_base/constants.h" +#include "source_base/libm/libm.h" +#include "source_base/math_integral.h" +#include "source_base/timer.h" +#include "source_base/tool_title.h" +#include "source_cell/sep.h" +#include "source_cell/sep_cell.h" + +#include +#include +#include +#include + +namespace GlobalC +{ +VSep vsep_cell; +} +namespace +{ +double sphere_cut(double r, double r_out, double r_power) +{ + if (r <= 0 || r >= r_out) + { + return 0.0; + } + return std::pow(1 - std::pow(r / r_out, r_power), 3); +} + +double shell_cut(double r, double r_in, double r_out, double r_power) +{ + if (r_in <= 0) + { + return sphere_cut(r, r_out, r_power); + } + if (r_in >= r_out) + { + return 0.0; + } + if (r <= r_in || r >= r_out) + { + return 0.0; + } + return std::pow(1 - std::pow(2 * (r - r_in) / (r_out - r_in) - 1.0, r_power), 3); +} +} // namespace + +VSep::VSep() noexcept = default; + +VSep::~VSep() noexcept = default; + +void VSep::init_vsep(const ModulePW::PW_Basis& rho_basis) +{ + ModuleBase::TITLE("VSep", "init_vsep"); + ModuleBase::timer::tick("VSep", "init_vsep"); + + int ntype = GlobalC::sep_cell.get_ntype(); + + this->vsep_form.create(ntype, rho_basis.ngg, true); + + const double d_fpi_omega = ModuleBase::FOUR_PI / GlobalC::sep_cell.get_omega(); + int igl0 = 0; + for (int it = 0; it < ntype; ++it) + { + if (!GlobalC::sep_cell.get_sep_enable()[it]) + { + continue; + } + const SepPot* sep_pot = &GlobalC::sep_cell.get_seps()[it]; + // Simpson integral requires that the grid points be odd, if it is even, subtract one. + int mesh = sep_pot->mesh; + if ((mesh & 1) == 0) + { + mesh--; + } + + double* r = sep_pot->r; + double* rv = sep_pot->rv; + std::vector shell_rv(sep_pot->mesh); + std::vector rab(sep_pot->mesh); + std::vector aux(sep_pot->mesh); + + // calculate a and b of r [i] = a * (exp(b*i) - 1), i = 1,..., mesh. Note: no 0 + // for rab[i] = (r[i] + a) * b + double b_val = log(r[1] / r[0] - 1); + double a_val = r[0] / (exp(b_val) - 1); + + for (int ir = 0; ir < sep_pot->mesh; ++ir) + { + shell_rv[ir] + = shell_cut(r[ir], sep_pot->r_in, sep_pot->r_out, sep_pot->r_power) * rv[ir] * sep_pot->enhence_a; + rab[ir] = (r[ir] + a_val) * b_val; + } + + igl0 = 0; + // start from |G|=0 or not. + if (rho_basis.gg_uniq[0] < 1.0e-8) + { + for (int ir = 0; ir < sep_pot->mesh; ++ir) + { + aux[ir] = r[ir] * shell_rv[ir]; + } + ModuleBase::Integral::Simpson_Integral(mesh, aux.data(), rab.data(), this->vsep_form(it, 0)); + this->vsep_form(it, 0) *= d_fpi_omega; + igl0 = 1; + } + + for (int ig = igl0; ig < rho_basis.ngg; ++ig) + { + double gx2 = rho_basis.gg_uniq[ig] * GlobalC::sep_cell.get_tpiba2(); + double gx = std::sqrt(gx2); + for (int ir = 0; ir < sep_pot->mesh; ++ir) + { + aux[ir] = shell_rv[ir] * ModuleBase::libm::sin(gx * r[ir]) / gx; + } + ModuleBase::Integral::Simpson_Integral(mesh, aux.data(), rab.data(), this->vsep_form(it, ig)); + this->vsep_form(it, ig) *= d_fpi_omega; + } + } + + ModuleBase::timer::tick("VSep", "init_vsep"); +} + +void VSep::generate_vsep_r(const ModulePW::PW_Basis& rho_basis, const ModuleBase::ComplexMatrix& sf_in) +{ + ModuleBase::TITLE("VSep", "generate_vsep_r"); + ModuleBase::timer::tick("VSep", "generate_vsep_r"); + + this->nrxx = rho_basis.nrxx; + this->vsep_r.assign(rho_basis.nrxx, 0.0); + + std::unique_ptr[]> vg(new std::complex[rho_basis.npw]); + ModuleBase::GlobalFunc::ZEROS(vg.get(), rho_basis.npw); + + for (int it = 0; it < GlobalC::sep_cell.get_ntype(); it++) + { + if (!GlobalC::sep_cell.get_sep_enable()[it]) + { + continue; + } + + for (int ig = 0; ig < rho_basis.npw; ++ig) + { + vg[ig] += this->vsep_form(it, rho_basis.ig2igg[ig]) * sf_in(it, ig); + } + } + + rho_basis.recip2real(vg.get(), this->vsep_r.data()); + + ModuleBase::timer::tick("VSep", "generate_vsep_r"); +} diff --git a/source/source_pw/module_pwdft/VSep_in_pw.h b/source/source_pw/module_pwdft/VSep_in_pw.h new file mode 100644 index 0000000000..5b5be91367 --- /dev/null +++ b/source/source_pw/module_pwdft/VSep_in_pw.h @@ -0,0 +1,30 @@ +#ifndef VSEP_IN_PW +#define VSEP_IN_PW + +#include "source_base/matrix.h" +#include "source_basis/module_pw/pw_basis.h" + +#include + +class VSep +{ + public: + VSep() noexcept; + ~VSep() noexcept; + + void init_vsep(const ModulePW::PW_Basis& rho_basis); + void generate_vsep_r(const ModulePW::PW_Basis& rho_basis, const ModuleBase::ComplexMatrix& sf_in); + + ModuleBase::matrix vsep_form; + std::vector vsep_r; + + private: + int nrxx = 0; +}; + +namespace GlobalC +{ +extern VSep vsep_cell; +} + +#endif /* ifndef VSEP_IN_PW */ diff --git a/source/source_pw/module_pwdft/hamilt_pw.cpp b/source/source_pw/module_pwdft/hamilt_pw.cpp index 86782ed665..a48f230bdc 100644 --- a/source/source_pw/module_pwdft/hamilt_pw.cpp +++ b/source/source_pw/module_pwdft/hamilt_pw.cpp @@ -73,6 +73,10 @@ HamiltPW::HamiltPW(elecstate::Potential* pot_in, { pot_register_in.push_back("gatefield"); } + // DFT-1/2 + if (PARAM.inp.dfthalf_type == 1) { + pot_register_in.push_back("dfthalf"); + } //only Potential is not empty, Veff and Meta are available if(pot_register_in.size()>0) { diff --git a/source/source_pw/module_pwdft/test/CMakeLists.txt b/source/source_pw/module_pwdft/test/CMakeLists.txt index 32009ba9e7..5922b02567 100644 --- a/source/source_pw/module_pwdft/test/CMakeLists.txt +++ b/source/source_pw/module_pwdft/test/CMakeLists.txt @@ -30,7 +30,7 @@ AddTest( AddTest( TARGET structure_factor_test - LIBS parameter ${math_libs} base device planewave + LIBS parameter ${math_libs} base device planewave SOURCES structure_factor_test.cpp ../structure_factor.cpp ../parallel_grid.cpp ../../../source_cell/unitcell.cpp ../../../source_io/output.cpp @@ -49,8 +49,10 @@ AddTest( ../../../source_cell/read_pp_upf201.cpp ../../../source_cell/read_pp_vwr.cpp ../../../source_cell/read_pp_blps.cpp + ../../../source_cell/sep.cpp + ../../../source_cell/sep_cell.cpp ../../../source_estate/read_pseudo.cpp ../../../source_estate/cal_wfc.cpp ../../../source_estate/cal_nelec_nband.cpp ../../../source_estate/read_orb.cpp -) \ No newline at end of file +) From 0b7d5f22327c769b711d1c4dc4503eef22d08cb4 Mon Sep 17 00:00:00 2001 From: Yang Shengxin Date: Wed, 10 Sep 2025 15:35:53 +0800 Subject: [PATCH 02/13] Fix: Compilation error in DeepKS unit tests after adding DFT-1/2 --- source/source_lcao/module_deepks/test/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/source_lcao/module_deepks/test/CMakeLists.txt b/source/source_lcao/module_deepks/test/CMakeLists.txt index 4ea1cd1625..93574f4106 100644 --- a/source/source_lcao/module_deepks/test/CMakeLists.txt +++ b/source/source_lcao/module_deepks/test/CMakeLists.txt @@ -18,6 +18,8 @@ add_executable( ../../../source_cell/read_pp_upf201.cpp ../../../source_cell/read_pp_vwr.cpp ../../../source_cell/read_pp_blps.cpp + ../../../source_cell/sep.cpp + ../../../source_cell/sep_cell.cpp ../../../source_pw/module_pwdft/soc.cpp ../../../source_io/output.cpp ../../../source_io/sparse_matrix.cpp @@ -27,8 +29,8 @@ add_executable( ../../../source_estate/cal_nelec_nband.cpp ../../../source_estate/module_dm/density_matrix.cpp ../../../source_estate/module_dm/density_matrix_io.cpp - ../../../source_lcao/module_hcontainer/base_matrix.cpp - ../../../source_lcao/module_hcontainer/hcontainer.cpp + ../../../source_lcao/module_hcontainer/base_matrix.cpp + ../../../source_lcao/module_hcontainer/hcontainer.cpp ../../../source_lcao/module_hcontainer/atom_pair.cpp ../../../source_lcao/module_hcontainer/func_transfer.cpp ../../../source_lcao/module_hcontainer/func_folding.cpp From 94912c403a84bef855912173da18edc6867ca1af Mon Sep 17 00:00:00 2001 From: Yang Shengxin Date: Fri, 12 Sep 2025 18:41:36 +0800 Subject: [PATCH 03/13] Fix: Add the additional files to Makefile.Objects --- source/Makefile.Objects | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 764cf0ce51..c907fae608 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -202,6 +202,8 @@ OBJS_CELL=atom_pseudo.o\ bcast_cell.o\ read_stru.o\ read_atom_species.o\ + sep.o\ + sep_cell.o\ OBJS_DEEPKS=LCAO_deepks.o\ deepks_basic.o\ @@ -220,7 +222,7 @@ OBJS_DEEPKS=LCAO_deepks.o\ deepks_phialpha.o\ LCAO_deepks_io.o\ LCAO_deepks_interface.o\ - + OBJS_ELECSTAT=elecstate.o\ elecstate_energy_terms.o\ @@ -246,6 +248,7 @@ OBJS_ELECSTAT=elecstate.o\ cal_nelec_nband.o\ read_pseudo.o\ cal_wfc.o\ + pot_sep.o\ OBJS_ELECSTAT_LCAO=elecstate_lcao.o\ elecstate_lcao_cal_tau.o\ @@ -395,7 +398,7 @@ OBJS_HSOLVER=diago_cg.o\ diag_const_nums.o\ diag_hs_para.o\ diago_pxxxgvx.o\ - + OBJS_HSOLVER_LCAO=hsolver_lcao.o\ diago_scalapack.o\ diago_lapack.o\ @@ -413,7 +416,7 @@ OBJS_HSOLVER_PEXSI=diago_pexsi.o\ dist_bcd_matrix.o\ dist_ccs_matrix.o\ dist_matrix_transformer.o\ - + OBJS_MD=fire.o\ langevin.o\ md_base.o\ @@ -488,7 +491,7 @@ OBJS_RELAXATION=bfgs_basic.o\ lbfgs.o\ matrix_methods.o\ line_search.o\ - + OBJS_SURCHEM=surchem.o\ H_correction_pw.o\ @@ -688,7 +691,7 @@ OBJS_PARALLEL=parallel_common.o\ parallel_kpoints.o\ parallel_reduce.o\ parallel_device.o - + OBJS_SRCPW=H_Ewald_pw.o\ dnrm2.o\ VL_in_pw.o\ @@ -752,7 +755,8 @@ OBJS_SRCPW=H_Ewald_pw.o\ sto_elecond.o\ sto_dos.o\ onsite_projector.o\ - onsite_proj_tools.o + onsite_proj_tools.o\ + VSep_in_pw.o OBJS_VDW=vdw.o\ vdwd2_parameters.o\ From b86478177d9efce8d7b65533e206495b3c16590e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Sep 2025 20:39:19 +0800 Subject: [PATCH 04/13] Build(deps): Bump actions/setup-python from 5 to 6 (#6492) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/setup-python dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 22f6cf7efe..1e78dfb0a1 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -13,7 +13,7 @@ jobs: - name: Checkout uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.8 - name: Build Pyabacus From fbd4b5b2d15752a8a63ed51f31bddc96ac69c2aa Mon Sep 17 00:00:00 2001 From: Critsium Date: Mon, 8 Sep 2025 20:59:18 +0800 Subject: [PATCH 05/13] [Refactor] Move hardware initializer out from esolver code (#6494) * Move hardware initializer out from esolver * Remove useless codes * Remove finalize code out --- source/source_esolver/esolver_ks_pw.cpp | 30 --------------- source/source_main/driver.h | 4 ++ source/source_main/driver_run.cpp | 50 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/source/source_esolver/esolver_ks_pw.cpp b/source/source_esolver/esolver_ks_pw.cpp index 820cc76c00..06cea7236f 100644 --- a/source/source_esolver/esolver_ks_pw.cpp +++ b/source/source_esolver/esolver_ks_pw.cpp @@ -57,21 +57,6 @@ ESolver_KS_PW::ESolver_KS_PW() this->classname = "ESolver_KS_PW"; this->basisname = "PW"; this->device = base_device::get_device_type(this->ctx); - -#if ((defined __CUDA) || (defined __ROCM)) - if (this->device == base_device::GpuDevice) - { - ModuleBase::createGpuBlasHandle(); - hsolver::createGpuSolverHandle(); - container::kernels::createGpuBlasHandle(); - container::kernels::createGpuSolverHandle(); - } -#endif - -#ifdef __DSP - std::cout << " ** Initializing DSP Hardware..." << std::endl; - mtfunc::dspInitHandle(GlobalV::MY_RANK); -#endif } template @@ -87,21 +72,6 @@ ESolver_KS_PW::~ESolver_KS_PW() this->pelec = nullptr; } - if (this->device == base_device::GpuDevice) - { -#if defined(__CUDA) || defined(__ROCM) - ModuleBase::destoryBLAShandle(); - hsolver::destroyGpuSolverHandle(); - container::kernels::destroyGpuBlasHandle(); - container::kernels::destroyGpuSolverHandle(); -#endif - } - -#ifdef __DSP - std::cout << " ** Closing DSP Hardware..." << std::endl; - mtfunc::dspDestoryHandle(GlobalV::MY_RANK); -#endif - if (PARAM.inp.device == "gpu" || PARAM.inp.precision == "single") { delete this->kspw_psi; diff --git a/source/source_main/driver.h b/source/source_main/driver.h index d0ef395875..9bdcf1d285 100644 --- a/source/source_main/driver.h +++ b/source/source_main/driver.h @@ -37,6 +37,10 @@ class Driver // the actual calculations void driver_run(); + + // Init harewares according to Input parameters + void init_hardware(); + void finalize_hardware(); }; #endif diff --git a/source/source_main/driver_run.cpp b/source/source_main/driver_run.cpp index 4743b95bf8..579b8fbd08 100644 --- a/source/source_main/driver_run.cpp +++ b/source/source_main/driver_run.cpp @@ -6,6 +6,17 @@ #include "source_io/para_json.h" #include "source_io/print_info.h" #include "source_md/run_md.h" +#include "source_base/module_device/device.h" +#include "source_base/module_device/memory_op.h" +#include "source_base/kernels/math_kernel_op.h" +#include "source_hsolver/kernels/dngvd_op.h" + +#include +#include + +#ifdef __DSP +#include "source_base/kernels/dsp/dsp_connector.h" +#endif /** * @brief This is the driver function which defines the workflow of ABACUS @@ -47,6 +58,8 @@ void Driver::driver_run() unitcell::check_atomic_stru(ucell, PARAM.inp.min_dist_coef); //! 2: initialize the ESolver (depends on a set-up ucell after `setup_cell`) + this->init_hardware(); + ModuleESolver::ESolver* p_esolver = ModuleESolver::init_esolver(PARAM.inp, ucell); //! 3: initialize Esolver and fill json-structure @@ -93,9 +106,46 @@ void Driver::driver_run() p_esolver->after_all_runners(ucell); ModuleESolver::clean_esolver(p_esolver); + this->finalize_hardware(); //! 6: output the json file Json::create_Json(&ucell, PARAM); return; } + +void Driver::init_hardware() +{ +#if ((defined __CUDA) || (defined __ROCM)) + if (PARAM.inp.device == "gpu") + { + ModuleBase::createGpuBlasHandle(); + hsolver::createGpuSolverHandle(); + container::kernels::createGpuBlasHandle(); + container::kernels::createGpuSolverHandle(); + } +#endif + +#ifdef __DSP + std::cout << " ** Initializing DSP Hardware..." << std::endl; + mtfunc::dspInitHandle(GlobalV::MY_RANK); +#endif +} + +void Driver::finalize_hardware() +{ +#if defined(__CUDA) || defined(__ROCM) + if (PARAM.inp.device == "gpu") + { + ModuleBase::destoryBLAShandle(); + hsolver::destroyGpuSolverHandle(); + container::kernels::destroyGpuBlasHandle(); + container::kernels::destroyGpuSolverHandle(); + } +#endif + +#ifdef __DSP + std::cout << " ** Closing DSP Hardware..." << std::endl; + mtfunc::dspDestoryHandle(GlobalV::MY_RANK); +#endif +} \ No newline at end of file From 956ac6ae1f75cb9094291b248801ba6ff0bb1550 Mon Sep 17 00:00:00 2001 From: Tianxiang Wang Date: Tue, 9 Sep 2025 11:52:30 +0800 Subject: [PATCH 06/13] Feature: support NVTX profiling via timer_enable_nvtx flag (#6495) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Feature: support NVTX profiling via timer_enable_nvtx flag Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Add timer_enable_nvtx section in markdown Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Fix: Use __USE_NVTX macro to avoid NVTX linking errors in tests. Clarify in docs that timer_enable_nvtx parameter only takes effect on CUDA platforms. Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. --- CMakeLists.txt | 7 ++-- docs/advanced/input_files/input-main.md | 10 +++++ source/source_base/timer.cpp | 38 +++++++++++++------ .../module_parameter/input_parameter.h | 3 +- source/source_io/read_input_item_system.cpp | 6 +++ 5 files changed, 49 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 277f1924ec..c0c7b83bf8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -287,7 +287,7 @@ if (USE_SW) set(SW ON) include_directories(${SW_MATH}/include) include_directories(${SW_FFT}/include) - + target_link_libraries(${ABACUS_BIN_NAME} ${SW_FFT}/lib/libfftw3.a) target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswfft.a) target_link_libraries(${ABACUS_BIN_NAME} ${SW_MATH}/libswscalapack.a) @@ -373,6 +373,7 @@ if(USE_CUDA) if(USE_CUDA) add_compile_definitions(__CUDA) add_compile_definitions(__UT_USE_CUDA) + target_compile_definitions(${ABACUS_BIN_NAME} PRIVATE __USE_NVTX) if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G" CACHE STRING "CUDA flags for debug build" FORCE) endif() @@ -520,7 +521,7 @@ if(ENABLE_MLALGO) include_directories(${libnpy_INCLUDE_DIR}) endif() include_directories(${libnpy_SOURCE_DIR}/include) - + add_compile_definitions(__MLALGO) endif() @@ -560,7 +561,7 @@ if (ENABLE_CNPY) include_directories(${cnpy_INCLUDE_DIR}) endif() include_directories(${cnpy_SOURCE_DIR}) - + # find ZLIB and link find_package(ZLIB REQUIRED) target_link_libraries(${ABACUS_BIN_NAME} cnpy ZLIB::ZLIB) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index db4ff3b4f4..a2024dec66 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -22,6 +22,7 @@ - [min\_dist\_coef](#min_dist_coef) - [device](#device) - [precision](#precision) + - [timer_enable_nvtx](#timer_enable_nvtx) - [nb2d](#nb2d) - [Input Files](#variables-related-to-input-files) - [stru\_file](#stru_file) @@ -706,6 +707,15 @@ If only one value is set (such as `kspacing 0.5`), then kspacing values of a/b/c - double: double precision - **Default**: double +### timer_enable_nvtx + +- **Type**: Boolean +- **Description**: Controls whether NVTX profiling labels are emitted by the timer. This feature is only effective on CUDA platforms. + + - True: Enable NVTX profiling labels in the timer. + - False: Disable NVTX profiling labels in the timer. +- **Default**: False + ### nb2d - **Type**: Integer diff --git a/source/source_base/timer.cpp b/source/source_base/timer.cpp index f7a4be636d..e7f22df70d 100644 --- a/source/source_base/timer.cpp +++ b/source/source_base/timer.cpp @@ -14,6 +14,11 @@ #include "chrono" #include "source_base/formatter.h" +#if defined(__CUDA) && defined(__USE_NVTX) +#include +#include "source_io/module_parameter/parameter.h" +#endif + namespace ModuleBase { @@ -93,6 +98,12 @@ void timer::tick(const std::string &class_name,const std::string &name) #endif ++timer_one.calls; timer_one.start_flag = false; +#if defined(__CUDA) && defined(__USE_NVTX) + if (PARAM.inp.timer_enable_nvtx){ + std::string label = class_name + ":" + name; + nvtxRangePushA(label.data()); + } +#endif } else { @@ -107,6 +118,11 @@ void timer::tick(const std::string &class_name,const std::string &name) timer_one.cpu_second += (cpu_time() - timer_one.cpu_start); #endif timer_one.start_flag = true; +#if defined(__CUDA) && defined(__USE_NVTX) + if (PARAM.inp.timer_enable_nvtx){ + nvtxRangePop(); + } +#endif } } // end if(!omp_get_thread_num()) } @@ -128,7 +144,7 @@ void timer::write_to_json(std::string file_name) int is_initialized = 0; MPI_Initialized(&is_initialized); if (!is_initialized) { - return; + return; } int my_rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); @@ -195,12 +211,12 @@ void timer::write_to_json(std::string file_name) const Timer_One timer_one = timer_pool_B.second; ofs << indent << indent << indent << indent << "{\n"; ofs << indent << indent << indent << indent << "\"name\": \"" << name << "\",\n"; - ofs << indent << indent << indent << indent << "\"cpu_second\": " + ofs << indent << indent << indent << indent << "\"cpu_second\": " << std::setprecision(15) << timer_one.cpu_second << ",\n"; ofs << indent << indent << indent << indent << "\"calls\": " << timer_one.calls << ",\n"; - ofs << indent << indent << indent << indent << "\"cpu_second_per_call\": " + ofs << indent << indent << indent << indent << "\"cpu_second_per_call\": " << double_to_string(timer_one.cpu_second/timer_one.calls) << ",\n"; - ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": " + ofs << indent << indent << indent << indent << "\"cpu_second_per_total\": " << double_to_string(timer_one.cpu_second/timer_pool[""]["total"].cpu_second) << "\n"; if (order_b == timer_pool_A.second.size()) @@ -283,11 +299,11 @@ void timer::print_all(std::ofstream &ofs) // if the total time is too small, we do not calculate the percentage - if (timer_pool_order[0].second.cpu_second < 1e-9) + if (timer_pool_order[0].second.cpu_second < 1e-9) { pers.push_back(0); - } - else + } + else { pers.push_back(percentage); } @@ -300,10 +316,10 @@ void timer::print_all(std::ofstream &ofs) std::vector titles = {"CLASS_NAME", "NAME", "TIME/s", "CALLS", "AVG/s", "PER/%"}; std::vector formats = {"%-10s", "%-10s", "%6.2f", "%8d", "%6.2f", "%6.2f"}; - FmtTable time_statistics(/*titles=*/titles, - /*nrows=*/pers.size(), - /*formats=*/formats, - /*indent=*/0, + FmtTable time_statistics(/*titles=*/titles, + /*nrows=*/pers.size(), + /*formats=*/formats, + /*indent=*/0, /*align=*/{/*value*/FmtTable::Align::LEFT, /*title*/FmtTable::Align::CENTER}); time_statistics << class_names << names << times << calls << avgs << pers; const std::string table = "\nTIME STATISTICS\n" + time_statistics.str(); diff --git a/source/source_io/module_parameter/input_parameter.h b/source/source_io/module_parameter/input_parameter.h index 831db1b158..dd3d3fb6ea 100644 --- a/source/source_io/module_parameter/input_parameter.h +++ b/source/source_io/module_parameter/input_parameter.h @@ -67,6 +67,7 @@ struct Input_para std::string device = "auto"; std::string precision = "double"; + bool timer_enable_nvtx = false; // ============== #Parameters (2.Electronic structure) =========================== std::string ks_solver = "default"; ///< xiaohui add 2013-09-01 @@ -377,7 +378,7 @@ struct Input_para bool out_proj_band = false; ///< projected band structure calculation jiyy add 2022-05-11 std::string out_level = "ie"; ///< control the output information. std::vector out_dmr = {0, 8}; ///< output density matrix in real space DM(R) - std::vector out_dmk = {0, 8}; ///< output density matrix in reciprocal space DM(k) + std::vector out_dmk = {0, 8}; ///< output density matrix in reciprocal space DM(k) bool out_bandgap = false; ///< QO added for bandgap printing std::vector out_mat_hs = {0, 8}; ///< output H matrix and S matrix in local basis. std::vector out_mat_tk = {0, 8}; ///< output T(k) matrix in local basis. diff --git a/source/source_io/read_input_item_system.cpp b/source/source_io/read_input_item_system.cpp index 2eb2234d9f..c9854c9ece 100644 --- a/source/source_io/read_input_item_system.cpp +++ b/source/source_io/read_input_item_system.cpp @@ -830,6 +830,12 @@ void ReadInput::item_system() }; this->add_item(item); } + { + Input_Item item("timer_enable_nvtx"); + item.annotation = "enable NVTX labeling for profiling or not"; + read_sync_bool(input.timer_enable_nvtx); + this->add_item(item); + } } } // namespace ModuleIO From c51f7f7c84da38e24fd809e85820f056bd6b2305 Mon Sep 17 00:00:00 2001 From: Tianxiang Wang Date: Tue, 9 Sep 2025 12:01:03 +0800 Subject: [PATCH 07/13] Perf: Optimize Davidson by fusing operators, offloading CPU computation to GPU, and reducing memory transfers (#6493) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Perf: Optimize Diago_DavSubspace with GPU operators by adding and fusing custom kernels. Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Perf: reduce memory allocation and copy in Diago_DavSubspace::diag_zhegvx Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Perf: Replace loop-based 2D copy and memset with memcpy_2d_op, memset_2d_op Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Perf: use warp reduce instead of shared memory for better efficiency Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Fix compilation error Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. --- .../kernels/cuda/math_kernel_op.cu | 44 ++++- source/source_base/kernels/math_kernel_op.cpp | 25 +++ source/source_base/kernels/math_kernel_op.h | 40 +++- .../kernels/rocm/math_kernel_op.hip.cu | 42 ++++- .../module_device/cuda/memory_op.cu | 68 +++++++ .../source_base/module_device/memory_op.cpp | 110 +++++++++++ source/source_base/module_device/memory_op.h | 113 ++++++++++- source/source_hsolver/diago_dav_subspace.cpp | 128 +++---------- source/source_hsolver/diago_dav_subspace.h | 5 + .../source_hsolver/kernels/bpcg_kernel_op.cpp | 27 +++ .../source_hsolver/kernels/bpcg_kernel_op.h | 55 +++++- .../kernels/cuda/bpcg_kernel_op.cu | 175 +++++++++++++----- .../source_hsolver/kernels/cuda/dngvd_op.cu | 2 +- .../kernels/rocm/bpcg_kernel_op.hip.cu | 112 +++++++++-- .../kernels/rocm/dngvd_op.hip.cu | 12 +- 15 files changed, 768 insertions(+), 190 deletions(-) diff --git a/source/source_base/kernels/cuda/math_kernel_op.cu b/source/source_base/kernels/cuda/math_kernel_op.cu index 415ec9f12a..562760ca7a 100644 --- a/source/source_base/kernels/cuda/math_kernel_op.cu +++ b/source/source_base/kernels/cuda/math_kernel_op.cu @@ -133,6 +133,14 @@ __global__ void matrix_copy_kernel(const int n1, const int n2, const T* A, const } } +template +__global__ void matrix_multiply_vector_kernel(const int m, const int n, T *a, const int lda, const Real *b, const Real alpha, T *c, const int ldc){ + int row = blockIdx.x * blockDim.x + threadIdx.x; + int col = blockIdx.y * blockDim.y + threadIdx.y; + if (col >= n || row >= m) return; + c[col * ldc + row] = a[col * lda + row] * b[col] * alpha; +} + cublasOperation_t judge_trans_op(bool is_complex, const char& trans, const char* name) { if (trans == 'N') @@ -147,7 +155,7 @@ cublasOperation_t judge_trans_op(bool is_complex, const char& trans, const char* { return CUBLAS_OP_C; } - else + else { ModuleBase::WARNING_QUIT(name, std::string("Unknown trans type ") + trans + std::string(" !")); } @@ -438,10 +446,44 @@ void matrixCopy, base_device::DEVICE_GPU>::operator()(const cudaCheckOnDebug(); } +template <> +void matrix_mul_vector_op::operator()(const int &m, const int &n, + double *a, const int &lda, const double *b, const double alpha, double *c, const int &ldc){ + dim3 thread(16, 16, 1); + dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + matrix_multiply_vector_kernel <<>>(m, n, a, lda, + b, alpha, c, ldc); + cudaCheckOnDebug(); +} + +template <> +void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, + std::complex *a, const int &lda, const float *b, const float alpha, std::complex *c, const int &ldc){ + dim3 thread(16, 16, 1); + dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + matrix_multiply_vector_kernel, float> <<>>(m, n, reinterpret_cast*>(a), lda, + b, alpha, reinterpret_cast*>(c), ldc); + cudaCheckOnDebug(); +} + +template <> +void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, + std::complex *a, const int &lda, const double *b, const double alpha, std::complex *c, const int &ldc) +{ + dim3 thread(16, 16, 1); + dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + matrix_multiply_vector_kernel, double> <<>>(m, n, reinterpret_cast*>(a), lda, + b, alpha, reinterpret_cast*>(c), ldc); + cudaCheckOnDebug(); +} // Explicitly instantiate functors for the types of functor registered. template struct matrixCopy, base_device::DEVICE_GPU>; template struct matrixCopy; template struct matrixCopy, base_device::DEVICE_GPU>; + +template struct matrix_mul_vector_op, base_device::DEVICE_GPU>; +template struct matrix_mul_vector_op; +template struct matrix_mul_vector_op, base_device::DEVICE_GPU>; } // namespace ModuleBase diff --git a/source/source_base/kernels/math_kernel_op.cpp b/source/source_base/kernels/math_kernel_op.cpp index dad790e463..343d342e00 100644 --- a/source/source_base/kernels/math_kernel_op.cpp +++ b/source/source_base/kernels/math_kernel_op.cpp @@ -119,12 +119,35 @@ struct matrixCopy } }; +template +struct matrix_mul_vector_op { + using Real = typename GetTypeReal::type; + void operator()(const int& m, const int &n, + T *a, + const int &lda, + const Real *b, + const Real alpha, + T *c, + const int &ldc){ +#ifdef _OPENMP +#pragma omp parallel for collapse(2) schedule(static, 8192 / sizeof(T)) +#endif + for (int j = 0; j < n; j++){ + for (int i = 0; i < m; i++){ + c[j * ldc + i] = a[j * lda + i] * b[j] * alpha; + } + } + + } +}; + template struct gemv_op, base_device::DEVICE_CPU>; template struct gemv_op; template struct gemm_op, base_device::DEVICE_CPU>; template struct gemm_op; template struct matrixTranspose_op, base_device::DEVICE_CPU>; template struct matrixCopy, base_device::DEVICE_CPU>; +template struct matrix_mul_vector_op, base_device::DEVICE_CPU>; template struct gemv_op, base_device::DEVICE_CPU>; template struct gemv_op; @@ -133,6 +156,8 @@ template struct gemm_op; template struct matrixTranspose_op, base_device::DEVICE_CPU>; template struct matrixCopy; template struct matrixCopy, base_device::DEVICE_CPU>; +template struct matrix_mul_vector_op; +template struct matrix_mul_vector_op, base_device::DEVICE_CPU>; #ifdef __LCAO template struct matrixTranspose_op; diff --git a/source/source_base/kernels/math_kernel_op.h b/source/source_base/kernels/math_kernel_op.h index 9476d32fdc..f5c8e218df 100644 --- a/source/source_base/kernels/math_kernel_op.h +++ b/source/source_base/kernels/math_kernel_op.h @@ -104,7 +104,7 @@ template struct vector_div_constant_op { /// /// Input Parameters /// \param dim : array size - /// \param vector : input array + /// \param vector : input array /// \param constant : input constant /// /// Output Parameters @@ -298,6 +298,31 @@ template struct matrixCopy { void operator()(const int& n1, const int& n2, const T* A, const int& LDA, T* B, const int& LDB); }; +template +struct matrix_mul_vector_op { + using Real = typename GetTypeReal::type; + /// @brief a * b * beta by each column + /// + /// Input Parameters + /// \param m : row number + /// \param n : column number + /// \param a : input matrix + /// \param lda : leading dimension of matrix a + /// \param b : input vector + /// \param alpha : factor + /// \param ldc : leading dimension of matrix c + /// + /// Output Parameters + /// \param c : output matrix + void operator()(const int &m, const int &n, + T *a, + const int &lda, + const Real *b, + const Real alpha, + T *c, + const int &ldc); +}; + template struct apply_eigenvalues_op { using Real = typename GetTypeReal::type; @@ -314,7 +339,7 @@ struct precondition_op { T* psi_iter, const int& nbase, const int& notconv, - const Real* precondition, + const Real* precondition, const Real* eigenvalues); }; @@ -393,6 +418,17 @@ template struct matrixCopy { const int& LDB); }; +template struct matrix_mul_vector_op { + using Real = typename GetTypeReal::type; + void operator()(const int &m, const int &n, + T *a, + const int &lda, + const Real *b, + const Real alpha, + T *c, + const int &ldc); +}; + void createGpuBlasHandle(); void destoryBLAShandle(); diff --git a/source/source_base/kernels/rocm/math_kernel_op.hip.cu b/source/source_base/kernels/rocm/math_kernel_op.hip.cu index 8eb87988c8..f60cdfc3b1 100644 --- a/source/source_base/kernels/rocm/math_kernel_op.hip.cu +++ b/source/source_base/kernels/rocm/math_kernel_op.hip.cu @@ -145,6 +145,15 @@ __launch_bounds__(1024) __global__ } } +template +__launch_bounds__(1024) __global__ +void matrix_multiply_vector_kernel(const int m, const int n, T *a, const int lda, const Real *b, const Real alpha, T *c, const int ldc){ + int row = blockIdx.x * blockDim.x + threadIdx.x; + int col = blockIdx.y * blockDim.y + threadIdx.y; + if (col >= n || row >= m) return; + c[col * ldc + row] = a[col * lda + row] * b[col] * alpha; +} + hipblasOperation_t judge_trans_op(bool is_complex, const char& trans, const char* name) { if (trans == 'N') @@ -159,7 +168,7 @@ hipblasOperation_t judge_trans_op(bool is_complex, const char& trans, const char { return HIPBLAS_OP_C; } - else + else { ModuleBase::WARNING_QUIT(name, std::string("Unknown trans type ") + trans + std::string(" !")); } @@ -437,7 +446,38 @@ void matrixCopy, base_device::DEVICE_GPU>::operator()(const hipCheckOnDebug(); } +template <> +void matrix_mul_vector_op::operator()(const int &m, const int &n, + double *a, const int &lda, const double *b, const double alpha, double *c, const int &ldc){ + dim3 thread(16, 16, 1); + dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_multiply_vector_kernel), dim3(block, thread), + m, n, a, lda, b, alpha, c, ldc); + hipCheckOnDebug(); +} +template <> +void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, + std::complex *a, const int &lda, const float *b, const float alpha, std::complex *c, const int &ldc){ + dim3 thread(16, 16, 1); + dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_multiply_vector_kernel, float>), dim3(block, thread), + m, n, reinterpret_cast*>(a), lda, + b, alpha, reinterpret_cast*>(c), ldc); + hipCheckOnDebug(); +} + +template <> +void matrix_mul_vector_op, base_device::DEVICE_GPU>::operator()(const int &m, const int &n, + std::complex *a, const int &lda, const double *b, const double alpha, std::complex *c, const int &ldc) +{ + dim3 thread(16, 16, 1); + dim3 block((m + thread.x - 1) / thread.x, (n + thread.y - 1) / thread.y, 1); + hipLaunchKernelGGL(HIP_KERNEL_NAME(matrix_multiply_vector_kernel, double>), dim3(block, thread), + m, n, reinterpret_cast*>(a), lda, + b, alpha, reinterpret_cast*>(c), ldc); + hipCheckOnDebug(); +} // Explicitly instantiate functors for the types of functor registered. template struct matrixCopy; diff --git a/source/source_base/module_device/cuda/memory_op.cu b/source/source_base/module_device/cuda/memory_op.cu index 43eae7975c..ecc972c3ba 100644 --- a/source/source_base/module_device/cuda/memory_op.cu +++ b/source/source_base/module_device/cuda/memory_op.cu @@ -85,6 +85,16 @@ void set_memory_op::operator()(FPTYPE* arr, cudaErrcheck(cudaMemset(arr, var, sizeof(FPTYPE) * size)); } +template +void set_memory_2d_op::operator()(FPTYPE* arr, + const size_t pitch, + const int var, + const size_t width, + const size_t height) +{ + cudaErrcheck(cudaMemset2D(arr, sizeof(FPTYPE) * pitch , var, sizeof(FPTYPE) * width, height)); +} + template void synchronize_memory_op::operator()( FPTYPE* arr_out, @@ -112,6 +122,42 @@ void synchronize_memory_op +void synchronize_memory_2d_op::operator()( + FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) +{ + cudaErrcheck(cudaMemcpy2D(arr_out, dpitch * sizeof(FPTYPE), arr_in, spitch * sizeof(FPTYPE), width * sizeof(FPTYPE), height, cudaMemcpyDeviceToHost)); +} + +template +void synchronize_memory_2d_op::operator()( + FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) +{ + cudaErrcheck(cudaMemcpy2D(arr_out, dpitch * sizeof(FPTYPE), arr_in, spitch * sizeof(FPTYPE), width * sizeof(FPTYPE), height, cudaMemcpyHostToDevice)); +} + +template +void synchronize_memory_2d_op::operator()( + FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) +{ + cudaErrcheck(cudaMemcpy2D(arr_out, dpitch * sizeof(FPTYPE), arr_in, spitch * sizeof(FPTYPE), width * sizeof(FPTYPE), height, cudaMemcpyDeviceToDevice)); +} + template struct cast_memory_op { @@ -196,6 +242,12 @@ template struct set_memory_op; template struct set_memory_op, base_device::DEVICE_GPU>; template struct set_memory_op, base_device::DEVICE_GPU>; +template struct set_memory_2d_op; +template struct set_memory_2d_op; +template struct set_memory_2d_op; +template struct set_memory_2d_op, base_device::DEVICE_GPU>; +template struct set_memory_2d_op, base_device::DEVICE_GPU>; + template struct synchronize_memory_op; template struct synchronize_memory_op; template struct synchronize_memory_op; @@ -212,6 +264,22 @@ template struct synchronize_memory_op, base_device::DEVICE_ template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; + template struct cast_memory_op; template struct cast_memory_op; template struct cast_memory_op; diff --git a/source/source_base/module_device/memory_op.cpp b/source/source_base/module_device/memory_op.cpp index 189014c981..da22bc2ce6 100644 --- a/source/source_base/module_device/memory_op.cpp +++ b/source/source_base/module_device/memory_op.cpp @@ -55,6 +55,18 @@ struct set_memory_op } }; +template +struct set_memory_2d_op +{ + void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height) + { + for (size_t i = 0; i < height; i++){ + set_memory_op()(arr + i * pitch, var, width); + } + } +}; + + template struct synchronize_memory_op { @@ -70,6 +82,23 @@ struct synchronize_memory_op +struct synchronize_memory_2d_op +{ + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) + { + for (int i = 0; i < height; i++){ + synchronize_memory_op()( + arr_out + i * dpitch, arr_in + i * spitch, width); + } + } +}; + template struct cast_memory_op { @@ -108,12 +137,24 @@ template struct set_memory_op; template struct set_memory_op, base_device::DEVICE_CPU>; template struct set_memory_op, base_device::DEVICE_CPU>; +template struct set_memory_2d_op; +template struct set_memory_2d_op; +template struct set_memory_2d_op; +template struct set_memory_2d_op, base_device::DEVICE_CPU>; +template struct set_memory_2d_op, base_device::DEVICE_CPU>; + template struct synchronize_memory_op; template struct synchronize_memory_op; template struct synchronize_memory_op; template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; template struct synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; + template struct cast_memory_op; template struct cast_memory_op; template struct cast_memory_op; @@ -167,6 +208,14 @@ struct set_memory_op } }; +template +struct set_memory_2d_op +{ + void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height) + { + } +}; + template struct synchronize_memory_op { @@ -197,6 +246,45 @@ struct synchronize_memory_op +struct synchronize_memory_2d_op +{ + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) + { + } +}; + +template +struct synchronize_memory_2d_op +{ + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) + { + } +}; + +template +struct synchronize_memory_2d_op +{ + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height) + { + } +}; + template struct cast_memory_op { @@ -247,6 +335,12 @@ template struct set_memory_op; template struct set_memory_op, base_device::DEVICE_GPU>; template struct set_memory_op, base_device::DEVICE_GPU>; +template struct set_memory_2d_op; +template struct set_memory_2d_op; +template struct set_memory_2d_op; +template struct set_memory_2d_op, base_device::DEVICE_GPU>; +template struct set_memory_2d_op, base_device::DEVICE_GPU>; + template struct synchronize_memory_op; template struct synchronize_memory_op; template struct synchronize_memory_op; @@ -263,6 +357,22 @@ template struct synchronize_memory_op, base_device::DEVICE_ template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; template struct synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op; +template struct synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +template struct synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_GPU>; + template struct cast_memory_op; template struct cast_memory_op; template struct cast_memory_op; diff --git a/source/source_base/module_device/memory_op.h b/source/source_base/module_device/memory_op.h index 30182649d3..c24acbb024 100644 --- a/source/source_base/module_device/memory_op.h +++ b/source/source_base/module_device/memory_op.h @@ -40,6 +40,22 @@ struct set_memory_op void operator()(FPTYPE* arr, const int var, const size_t size); }; +template +struct set_memory_2d_op +{ + /// @brief memset2D for multi-device + /// + /// Input Parameters + /// \param var : the specified constant value + /// \param pitch : Pitch in elements of 2D device memory + /// \param width : Width of matrix set (columns in elements) + /// \param height : Height of matrix set (rows) + /// + /// Output Parameters + /// \param arr : output array initialized by the input value + void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height); +}; + template struct synchronize_memory_op { @@ -56,6 +72,28 @@ struct synchronize_memory_op const size_t size); }; +template +struct synchronize_memory_2d_op +{ + /// @brief memcpy2D for multi-device + /// + /// Input Parameters + /// \param arr_in : input array + /// \param dpitch : Pitch in elements of destination memory + /// \param spitch : Pitch in elements of source memory + /// \param width : Width of matrix transfer (columns in elements) + /// \param height : Height of matrix transfer (rows) + /// + /// Output Parameters + /// \param arr_out : output array initialized by the input array + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); +}; + template struct cast_memory_op { @@ -113,6 +151,12 @@ struct set_memory_op void operator()(FPTYPE* arr, const int var, const size_t size); }; +template +struct set_memory_2d_op +{ + void operator()(FPTYPE* arr, const size_t pitch, const int var, const size_t width, const size_t height); +}; + template struct synchronize_memory_op { @@ -133,7 +177,38 @@ struct synchronize_memory_op +struct synchronize_memory_2d_op +{ + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); +}; +template +struct synchronize_memory_2d_op +{ + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); +}; +template +struct synchronize_memory_2d_op +{ + void operator()(FPTYPE* arr_out, + const size_t dpitch, + const FPTYPE* arr_in, + const size_t spitch, + const size_t width, + const size_t height); }; template @@ -194,6 +269,16 @@ using setmem_dd_op = base_device::memory::set_memory_op, base_device::DEVICE_GPU>; using setmem_zd_op = base_device::memory::set_memory_op, base_device::DEVICE_GPU>; +using setmem_sh_2d_op = base_device::memory::set_memory_2d_op; +using setmem_dh_2d_op = base_device::memory::set_memory_2d_op; +using setmem_ch_2d_op = base_device::memory::set_memory_2d_op, base_device::DEVICE_CPU>; +using setmem_zh_2d_op = base_device::memory::set_memory_2d_op, base_device::DEVICE_CPU>; + +using setmem_sd_2d_op = base_device::memory::set_memory_2d_op; +using setmem_dd_2d_op = base_device::memory::set_memory_2d_op; +using setmem_cd_2d_op = base_device::memory::set_memory_2d_op, base_device::DEVICE_GPU>; +using setmem_zd_2d_op = base_device::memory::set_memory_2d_op, base_device::DEVICE_GPU>; + using delmem_sh_op = base_device::memory::delete_memory_op; using delmem_dh_op = base_device::memory::delete_memory_op; using delmem_ch_op = base_device::memory::delete_memory_op, base_device::DEVICE_CPU>; @@ -230,6 +315,32 @@ using syncmem_z2z_h2d_op using syncmem_z2z_d2h_op = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +using syncmem_c2c_h2h_op + = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_c2c_h2d_op + = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_c2c_d2h_op + = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +using syncmem_z2z_h2h_op + = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_z2z_h2d_op + = base_device::memory::synchronize_memory_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_z2z_d2h_op + = base_device::memory::synchronize_memory_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; + +using syncmem_c2c_h2h_2d_op + = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_c2c_h2d_2d_op + = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_c2c_d2h_2d_op + = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; +using syncmem_z2z_h2h_2d_op + = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_CPU>; +using syncmem_z2z_h2d_2d_op + = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_GPU, base_device::DEVICE_CPU>; +using syncmem_z2z_d2h_2d_op + = base_device::memory::synchronize_memory_2d_op, base_device::DEVICE_CPU, base_device::DEVICE_GPU>; + using castmem_s2d_h2h_op = base_device::memory::cast_memory_op; using castmem_s2d_h2d_op diff --git a/source/source_hsolver/diago_dav_subspace.cpp b/source/source_hsolver/diago_dav_subspace.cpp index 4402fad9f8..d427f246aa 100644 --- a/source/source_hsolver/diago_dav_subspace.cpp +++ b/source/source_hsolver/diago_dav_subspace.cpp @@ -76,6 +76,8 @@ Diago_DavSubspace::Diago_DavSubspace(const std::vector& precond { resmem_real_op()(this->d_precondition, nbasis_in); // syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, this->precondition.data(), nbasis_in); + base_device::memory::resize_memory_op()(this->d_scc, this->nbase_x * this->nbase_x); + resmem_real_op()(this->d_eigenvalue, this->nbase_x); } #endif } @@ -94,6 +96,8 @@ Diago_DavSubspace::~Diago_DavSubspace() if (this->device == base_device::GpuDevice) { delmem_real_op()(this->d_precondition); + delmem_complex_op()(this->d_scc); + delmem_real_op()(this->d_eigenvalue); } #endif } @@ -125,13 +129,10 @@ int Diago_DavSubspace::diag_once(const HPsiFunc& hpsi_func, ModuleBase::timer::tick("Diago_DavSubspace", "first"); + syncmem_complex_2d_op()(this->psi_in_iter, this->dim, psi_in, psi_in_dmax, this->dim, this->n_band); for (int m = 0; m < this->n_band; m++) { unconv[m] = m; - - syncmem_complex_op()(this->psi_in_iter + m * this->dim, - psi_in + m * psi_in_dmax, - this->dim); } // compute h*psi_in_iter @@ -243,12 +244,7 @@ int Diago_DavSubspace::diag_once(const HPsiFunc& hpsi_func, // estimate of the eigenvectors and set the basis dimension to N; // update this->psi_in_iter according to psi_in - for (size_t i = 0; i < this->n_band; i++) - { - syncmem_complex_op()(this->psi_in_iter + i * this->dim, - psi_in + i * psi_in_dmax, - this->dim); - } + syncmem_complex_2d_op()(this->psi_in_iter, this->dim, psi_in, psi_in_dmax, this->dim, this->n_band); this->refresh(this->dim, this->n_band, @@ -316,30 +312,15 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, this->dim); // Eigenvalues operation section - std::vector e_temp_cpu(this->notconv, 0); - Real* e_temp_hd = e_temp_cpu.data(); - if (this->device == base_device::GpuDevice) - { - e_temp_hd = nullptr; - resmem_real_op()(e_temp_hd, nbase); - } - - for (int m = 0; m < this->notconv; m++) - { - e_temp_cpu[m] = -(*eigenvalue_iter)[m]; - } - + Real* e_temp_hd = eigenvalue_iter->data(); if (this->device == base_device::GpuDevice) { - syncmem_var_h2d_op()(e_temp_hd, e_temp_cpu.data(), this->notconv); + syncmem_var_h2d_op()(this->d_eigenvalue, eigenvalue_iter->data(), this->nbase_x); + e_temp_hd = this->d_eigenvalue; } - - apply_eigenvalues_op()(nbase, this->nbase_x, this->notconv, this->vcc, this->vcc, e_temp_hd); - if (this->device == base_device::GpuDevice) - { - delmem_real_op()(e_temp_hd); - } + // vcc = - vcc * eigenvalue + ModuleBase::matrix_mul_vector_op()(nbase, notconv, vcc, this->nbase_x, eigenvalue_iter->data(), -1.0, vcc, this->nbase_x); #ifdef __DSP ModuleBase::gemm_op_mt() @@ -364,17 +345,12 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, #if defined(__CUDA) || defined(__ROCM) if (this->device == base_device::GpuDevice) { - Real* eigenvalues_gpu = nullptr; - resmem_real_op()(eigenvalues_gpu, notconv); - syncmem_var_h2d_op()(eigenvalues_gpu, (*eigenvalue_iter).data(), notconv); - precondition_op()(this->dim, psi_iter, nbase, notconv, d_precondition, - eigenvalues_gpu); - delmem_real_op()(eigenvalues_gpu); + this->d_eigenvalue); } else #endif @@ -395,7 +371,7 @@ void Diago_DavSubspace::cal_grad(const HPsiFunc& hpsi_func, resmem_real_op()(psi_norm, notconv); using setmem_real_op = base_device::memory::set_memory_op; setmem_real_op()(psi_norm, 0.0, notconv); - + normalize_op()(this->dim, psi_iter, nbase, @@ -564,34 +540,9 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, #if defined(__CUDA) || defined(__ROCM) if (this->diag_comm.rank == 0) { - Real* eigenvalue_gpu = nullptr; - resmem_real_op()(eigenvalue_gpu, this->nbase_x); - - syncmem_var_h2d_op()(eigenvalue_gpu, (*eigenvalue_iter).data(), this->nbase_x); - - T* hcc_gpu = nullptr; - T* scc_gpu = nullptr; - T* vcc_gpu = nullptr; - base_device::memory::resize_memory_op()(hcc_gpu, nbase * nbase); - base_device::memory::resize_memory_op()(scc_gpu, nbase * nbase); - base_device::memory::resize_memory_op()(vcc_gpu, nbase * nbase); - for(int i=0;i()(hcc_gpu + i * nbase, hcc + i * nbase_x, nbase); - base_device::memory::synchronize_memory_op()(scc_gpu + i * nbase, scc + i * nbase_x, nbase); - } - dngvd_op()(this->ctx, nbase, nbase, hcc_gpu, scc_gpu, eigenvalue_gpu, vcc_gpu); - for(int i=0;i()(vcc + i * nbase_x, vcc_gpu + i * nbase, nbase); - } - delmem_complex_op()(hcc_gpu); - delmem_complex_op()(scc_gpu); - delmem_complex_op()(vcc_gpu); - - syncmem_var_d2h_op()((*eigenvalue_iter).data(), eigenvalue_gpu, this->nbase_x); - - delmem_real_op()(eigenvalue_gpu); + base_device::memory::synchronize_memory_op()(this->d_scc, scc, nbase * this->nbase_x); + dngvd_op()(this->ctx, nbase, this->nbase_x, this->hcc, this->d_scc, this->d_eigenvalue, this->vcc); + syncmem_var_d2h_op()((*eigenvalue_iter).data(), this->d_eigenvalue, this->nbase_x); } #endif } @@ -641,7 +592,7 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, } else { -#ifdef __MPI +#ifdef __MPI std::vector h_diag; std::vector s_diag; std::vector vcc_tmp; @@ -680,7 +631,7 @@ void Diago_DavSubspace::diag_zhegvx(const int& nbase, } #else std::cout << "Error: parallel diagonalization is not supported in serial mode." << std::endl; - exit(1); + exit(1); #endif } } @@ -763,48 +714,13 @@ void Diago_DavSubspace::refresh(const int& dim, nbase = nband; // set hcc/scc/vcc to 0 - for (size_t i = 0; i < nbase; i++) - { - setmem_complex_op()(&hcc[this->nbase_x * i], 0, nbase); - setmem_complex_op()(&scc[this->nbase_x * i], 0, nbase); - setmem_complex_op()(&vcc[this->nbase_x * i], 0, nbase); - } + setmem_complex_2d_op()(hcc, this->nbase_x, 0, nbase, nbase); + setmem_complex_2d_op()(scc, this->nbase_x, 0, nbase, nbase); + setmem_complex_2d_op()(vcc, this->nbase_x, 0, nbase, nbase); if (this->device == base_device::GpuDevice) { -#if defined(__CUDA) || defined(__ROCM) - T* hcc_cpu = nullptr; - T* scc_cpu = nullptr; - T* vcc_cpu = nullptr; - base_device::memory::resize_memory_op()(hcc_cpu, - this->nbase_x * this->nbase_x, - "DAV::hcc"); - base_device::memory::resize_memory_op()(scc_cpu, - this->nbase_x * this->nbase_x, - "DAV::scc"); - base_device::memory::resize_memory_op()(vcc_cpu, - this->nbase_x * this->nbase_x, - "DAV::vcc"); - - syncmem_d2h_op()(hcc_cpu, hcc, this->nbase_x * this->nbase_x); - syncmem_d2h_op()(scc_cpu, scc, this->nbase_x * this->nbase_x); - syncmem_d2h_op()(vcc_cpu, vcc, this->nbase_x * this->nbase_x); - - for (int i = 0; i < nbase; i++) - { - hcc_cpu[i * this->nbase_x + i] = eigenvalue_in_hsolver[i]; - scc_cpu[i * this->nbase_x + i] = this->one[0]; - vcc_cpu[i * this->nbase_x + i] = this->one[0]; - } - - syncmem_h2d_op()(hcc, hcc_cpu, this->nbase_x * this->nbase_x); - syncmem_h2d_op()(scc, scc_cpu, this->nbase_x * this->nbase_x); - syncmem_h2d_op()(vcc, vcc_cpu, this->nbase_x * this->nbase_x); - - base_device::memory::delete_memory_op()(hcc_cpu); - base_device::memory::delete_memory_op()(scc_cpu); - base_device::memory::delete_memory_op()(vcc_cpu); -#endif + refresh_hcc_scc_vcc_op()(nbase, hcc, scc, vcc, this->nbase_x, this->d_eigenvalue, this->one_); } else { diff --git a/source/source_hsolver/diago_dav_subspace.h b/source/source_hsolver/diago_dav_subspace.h index 3b4c224ec8..a2227949d4 100644 --- a/source/source_hsolver/diago_dav_subspace.h +++ b/source/source_hsolver/diago_dav_subspace.h @@ -94,6 +94,9 @@ class Diago_DavSubspace /// Eigenvectors on the reduced basis T* vcc = nullptr; + T* d_scc = nullptr; + Real* d_eigenvalue = nullptr; + /// device type of psi Device* ctx = {}; base_device::DEVICE_CPU* cpu_ctx = {}; @@ -176,6 +179,7 @@ class Diago_DavSubspace using delmem_real_op = base_device::memory::delete_memory_op; #endif using setmem_real_op = base_device::memory::set_memory_op; + using setmem_complex_2d_op = base_device::memory::set_memory_2d_op; using resmem_real_h_op = base_device::memory::resize_memory_op; using delmem_real_h_op = base_device::memory::delete_memory_op; @@ -184,6 +188,7 @@ class Diago_DavSubspace using syncmem_var_h2d_op = base_device::memory::synchronize_memory_op; using syncmem_var_d2h_op = base_device::memory::synchronize_memory_op; using syncmem_complex_op = base_device::memory::synchronize_memory_op; + using syncmem_complex_2d_op = base_device::memory::synchronize_memory_2d_op; using castmem_complex_op = base_device::memory::cast_memory_op, T, Device, Device>; using syncmem_h2d_op = base_device::memory::synchronize_memory_op; using syncmem_d2h_op = base_device::memory::synchronize_memory_op; diff --git a/source/source_hsolver/kernels/bpcg_kernel_op.cpp b/source/source_hsolver/kernels/bpcg_kernel_op.cpp index 19c51fc398..3d74591783 100644 --- a/source/source_hsolver/kernels/bpcg_kernel_op.cpp +++ b/source/source_hsolver/kernels/bpcg_kernel_op.cpp @@ -183,6 +183,30 @@ struct normalize_op { } }; +template +struct refresh_hcc_scc_vcc_op +{ + using Real = typename GetTypeReal::type; + void operator()(const int &n, + T *hcc, + T *scc, + T *vcc, + const int &ldh, + const Real *eigenvalue, + const T &one) + { +#ifdef _OPENMP +#pragma omp parallel for collapse(1) schedule(static, 8192 / sizeof(T)) +#endif + for (int i = 0; i < n; i++) + { + hcc[i * ldh + i] = eigenvalue[i]; + scc[i * ldh + i] = one; + vcc[i * ldh + i] = one; + } + } +}; + template struct calc_grad_with_block_op, base_device::DEVICE_CPU>; template struct line_minimize_with_block_op, base_device::DEVICE_CPU>; template struct calc_grad_with_block_op, base_device::DEVICE_CPU>; @@ -196,4 +220,7 @@ template struct precondition_op; template struct normalize_op, base_device::DEVICE_CPU>; template struct normalize_op, base_device::DEVICE_CPU>; template struct normalize_op; +template struct refresh_hcc_scc_vcc_op, base_device::DEVICE_CPU>; +template struct refresh_hcc_scc_vcc_op, base_device::DEVICE_CPU>; +template struct refresh_hcc_scc_vcc_op; } // namespace hsolver \ No newline at end of file diff --git a/source/source_hsolver/kernels/bpcg_kernel_op.h b/source/source_hsolver/kernels/bpcg_kernel_op.h index 802ee3c1e4..9ac7c5e2ce 100644 --- a/source/source_hsolver/kernels/bpcg_kernel_op.h +++ b/source/source_hsolver/kernels/bpcg_kernel_op.h @@ -63,11 +63,11 @@ template struct apply_eigenvalues_op { using Real = typename GetTypeReal::type; - void operator()(const int& nbase, - const int& nbase_x, - const int& notconv, - T* result, - const T* vectors, + void operator()(const int& nbase, + const int& nbase_x, + const int& notconv, + T* result, + const T* vectors, const Real* eigenvalues); }; @@ -92,6 +92,30 @@ struct normalize_op { Real* psi_norm = nullptr); }; +template struct refresh_hcc_scc_vcc_op { + using Real = typename GetTypeReal::type; + /// @brief refresh hcc scc vcc + /// + /// Input Parameters + /// \param n : first dimension of matrix + /// \param ldh : leading dimension of hcc, scc, vcc + /// \param nbase : matrix size + /// \param eigenvalue : input eigenvalue + /// \param one : constant one + /// + /// Output Parameters + /// \param hcc : output matrix hcc + /// \param scc : output matrix scc + /// \param vcc : output matrix vcc + void operator()(const int &n, + T *hcc, + T *scc, + T *vcc, + const int &ldh, + const Real *eigenvalue, + const T& one); +}; + #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM template @@ -114,11 +138,11 @@ struct calc_grad_with_block_op { template struct apply_eigenvalues_op { using Real = typename GetTypeReal::type; - void operator()(const int& nbase, - const int& nbase_x, - const int& notconv, - T* result, - const T* vectors, + void operator()(const int& nbase, + const int& nbase_x, + const int& notconv, + T* result, + const T* vectors, const Real* eigenvalues); }; @@ -143,6 +167,17 @@ struct normalize_op { Real* psi_norm = nullptr); }; +template +struct refresh_hcc_scc_vcc_op { + using Real = typename GetTypeReal::type; + void operator()(const int &n, + T *hcc, + T *scc, + T *vcc, + const int &ldh, + const Real *eigenvalue, + const T& one); +}; #endif } // namespace hsolver diff --git a/source/source_hsolver/kernels/cuda/bpcg_kernel_op.cu b/source/source_hsolver/kernels/cuda/bpcg_kernel_op.cu index e8af516274..b0a08cc513 100644 --- a/source/source_hsolver/kernels/cuda/bpcg_kernel_op.cu +++ b/source/source_hsolver/kernels/cuda/bpcg_kernel_op.cu @@ -7,6 +7,8 @@ namespace hsolver { const int warp_size = 32; const int thread_per_block = 256; +#define FULL_MASK 0xffffffff +#define WARP_SIZE 32 template __global__ void line_minimize_with_block( @@ -257,7 +259,7 @@ __global__ void apply_eigenvalues_kernel( { int m = blockIdx.x; int idx = threadIdx.x + blockIdx.y * blockDim.x; - + if (m < notconv && idx < nbase) { result[m * nbase_x + idx] = eigenvalues[m] * vectors[m * nbase_x + idx]; } @@ -274,7 +276,7 @@ __global__ void precondition_kernel( { int m = blockIdx.x; int i = threadIdx.x + blockIdx.y * blockDim.x; - + if (m < notconv && i < dim) { Real x = abs(precondition[i] - eigenvalues[m]); Real pre = 0.5 * (1.0 + x + sqrt(1 + (x - 1.0) * (x - 1.0))); @@ -282,6 +284,37 @@ __global__ void precondition_kernel( } } +template +__device__ Real warpReduceSum(Real val) { + for (int offset = WARP_SIZE / 2; offset > 0; offset >>= 1) + val += __shfl_down_sync(FULL_MASK, val, offset); + return val; +} + +template +__device__ Real blockReduceSum(Real val, volatile Real* shared) { + int lane = threadIdx.x % WARP_SIZE; + int wid = threadIdx.x / WARP_SIZE; + + val = warpReduceSum(val); + + if (lane == 0) + shared[wid] = val; + + __syncthreads(); + + Real sum = 0.0; + if (wid == 0) { + sum = (threadIdx.x < blockDim.x / 32) ? shared[lane] : 0.0; + sum = warpReduceSum(sum); + if (lane == 0) shared[0] = sum; + } + + __syncthreads(); + return shared[0]; +} + + template __global__ void normalize_kernel( thrust::complex* psi_iter, @@ -292,50 +325,50 @@ __global__ void normalize_kernel( { int m = blockIdx.x; int tid = threadIdx.x; - __shared__ Real sum[thread_per_block]; - - sum[tid] = 0.0; - + extern __shared__ char s_char[]; + Real* shared = reinterpret_cast(s_char); + + Real local_sum = 0.0; + // Calculate the sum for normalization for (int i = tid; i < dim; i += thread_per_block) { auto val = psi_iter[(nbase + m) * dim + i]; - sum[tid] += (val * thrust::conj(val)).real(); - } - - __syncthreads(); - - // Parallel reduction in shared memory - for (int s = thread_per_block/2; s > warp_size; s >>= 1) { - if (tid < s) { - sum[tid] += sum[tid + s]; - } - __syncthreads(); - } - - if (tid < warp_size) { - sum[tid] += sum[tid + 32]; __syncwarp(); - sum[tid] += sum[tid + 16]; __syncwarp(); - sum[tid] += sum[tid + 8]; __syncwarp(); - sum[tid] += sum[tid + 4]; __syncwarp(); - sum[tid] += sum[tid + 2]; __syncwarp(); - sum[tid] += sum[tid + 1]; __syncwarp(); + local_sum += (val * thrust::conj(val)).real(); } - - __syncthreads(); - - Real norm = sqrt(sum[0]); - + + Real l2_sq = blockReduceSum(local_sum, shared); + Real norm = sqrt(l2_sq); + // Normalize the vector for (int i = tid; i < dim; i += thread_per_block) { psi_iter[(nbase + m) * dim + i] /= norm; } - + // Store the norm if needed if (tid == 0 && psi_norm != nullptr) { psi_norm[m] = norm; } } +template +__global__ void refresh_hcc_scc_vcc_kernel( + const int n, + T *hcc, + T *scc, + T *vcc, + const int ldh, + const Real *eigenvalue, + const T one) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < n) + { + hcc[i * ldh + i] = eigenvalue[i]; + scc[i * ldh + i] = one; + vcc[i * ldh + i] = one; + } +} + template void line_minimize_with_block_op::operator()(T* grad_out, T* hgrad_out, @@ -392,15 +425,15 @@ void apply_eigenvalues_op::operator()(const int& nba { const int threads_per_block = 256; const int blocks_per_grid_y = (nbase + threads_per_block - 1) / threads_per_block; - + dim3 grid(notconv, blocks_per_grid_y); - + auto vec_complex = reinterpret_cast*>(vectors); auto res_complex = reinterpret_cast*>(result); - + apply_eigenvalues_kernel<<>>( vec_complex, res_complex, eigenvalues, nbase, nbase_x, notconv); - + cudaCheckOnDebug(); } @@ -414,14 +447,14 @@ void precondition_op::operator()(const int& dim, { const int threads_per_block = 256; const int blocks_per_grid_y = (dim + threads_per_block - 1) / threads_per_block; - + dim3 grid(notconv, blocks_per_grid_y); - + auto psi_complex = reinterpret_cast*>(psi_iter); - + precondition_kernel<<>>( psi_complex, precondition, eigenvalues, dim, nbase, notconv); - + cudaCheckOnDebug(); } @@ -433,10 +466,63 @@ void normalize_op::operator()(const int& dim, Real* psi_norm) { auto psi_complex = reinterpret_cast*>(psi_iter); - - normalize_kernel<<>>( + int sharedMemSize = (thread_per_block / WARP_SIZE) * sizeof(Real); + + normalize_kernel<<>>( psi_complex, psi_norm, dim, nbase, notconv); - + + cudaCheckOnDebug(); +} + +template <> +void refresh_hcc_scc_vcc_op::operator()(const int &n, + double *hcc, + double *scc, + double *vcc, + const int &ldh, + const double *eigenvalue, + const double& one) +{ + int thread = 512; + int block = (n + thread - 1) / thread; + refresh_hcc_scc_vcc_kernel <<>> (n, hcc, scc, vcc, ldh, eigenvalue, one); + + cudaCheckOnDebug(); +} + +template <> +void refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>::operator()(const int &n, + std::complex *hcc, + std::complex *scc, + std::complex *vcc, + const int &ldh, + const float *eigenvalue, + const std::complex& one) +{ + int thread = 512; + int block = (n + thread - 1) / thread; + refresh_hcc_scc_vcc_kernel, float> <<>> (n, reinterpret_cast*>(hcc), + reinterpret_cast*>(scc), reinterpret_cast*>(vcc), ldh, eigenvalue, + thrust::complex(one)); + + cudaCheckOnDebug(); +} + +template <> +void refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>::operator()(const int &n, + std::complex *hcc, + std::complex *scc, + std::complex *vcc, + const int &ldh, + const double *eigenvalue, + const std::complex& one) +{ + int thread = 512; + int block = (n + thread - 1) / thread; + refresh_hcc_scc_vcc_kernel, double> <<>> (n, reinterpret_cast*>(hcc), + reinterpret_cast*>(scc), reinterpret_cast*>(vcc), ldh, eigenvalue, + thrust::complex(one)); + cudaCheckOnDebug(); } @@ -453,4 +539,7 @@ template struct precondition_op; template struct normalize_op, base_device::DEVICE_GPU>; template struct normalize_op, base_device::DEVICE_GPU>; template struct normalize_op; +template struct refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>; +template struct refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>; +template struct refresh_hcc_scc_vcc_op; } \ No newline at end of file diff --git a/source/source_hsolver/kernels/cuda/dngvd_op.cu b/source/source_hsolver/kernels/cuda/dngvd_op.cu index 4ce3d9a1d0..5c1ef8ccab 100644 --- a/source/source_hsolver/kernels/cuda/dngvd_op.cu +++ b/source/source_hsolver/kernels/cuda/dngvd_op.cu @@ -216,7 +216,7 @@ struct dngvd_op Real* W, // eigenvalue T* V) { - assert(nstart == ldh); + // assert(nstart == ldh); // A to V cudaErrcheck(cudaMemcpy(V, A, sizeof(T) * ldh * nstart, cudaMemcpyDeviceToDevice)); xhegvd_wrapper(CUBLAS_FILL_MODE_UPPER, nstart, V, ldh, diff --git a/source/source_hsolver/kernels/rocm/bpcg_kernel_op.hip.cu b/source/source_hsolver/kernels/rocm/bpcg_kernel_op.hip.cu index 095cd4deff..b7bbeb7494 100644 --- a/source/source_hsolver/kernels/rocm/bpcg_kernel_op.hip.cu +++ b/source/source_hsolver/kernels/rocm/bpcg_kernel_op.hip.cu @@ -182,7 +182,7 @@ __global__ void apply_eigenvalues_kernel( { int m = blockIdx.x; int idx = threadIdx.x + blockIdx.y * blockDim.x; - + if (m < notconv && idx < nbase) { result[m * nbase_x + idx] = eigenvalues[m] * vectors[m * nbase_x + idx]; } @@ -199,7 +199,7 @@ __global__ void precondition_kernel( { int m = blockIdx.x; int i = threadIdx.x + blockIdx.y * blockDim.x; - + if (m < notconv && i < dim) { Real x = abs(precondition[i] - eigenvalues[m]); Real pre = 0.5 * (1.0 + x + sqrt(1 + (x - 1.0) * (x - 1.0))); @@ -218,17 +218,17 @@ __global__ void normalize_kernel( int m = blockIdx.x; int tid = threadIdx.x; __shared__ Real sum[THREAD_PER_BLOCK]; - + sum[tid] = 0.0; - + // Calculate the sum for normalization for (int i = tid; i < dim; i += THREAD_PER_BLOCK) { auto val = psi_iter[(nbase + m) * dim + i]; sum[tid] += (val * thrust::conj(val)).real(); } - + __syncthreads(); - + // Parallel reduction in shared memory for (int s = THREAD_PER_BLOCK/2; s > 0; s >>= 1) { if (tid < s) { @@ -236,20 +236,39 @@ __global__ void normalize_kernel( } __syncthreads(); } - + Real norm = sqrt(sum[0]); - + // Normalize the vector for (int i = tid; i < dim; i += THREAD_PER_BLOCK) { psi_iter[(nbase + m) * dim + i] /= norm; } - + // Store the norm if needed if (tid == 0 && psi_norm != nullptr) { psi_norm[m] = norm; } } +template +__global__ void refresh_hcc_scc_vcc_kernel( + const int n, + T *hcc, + T *scc, + T *vcc, + const int ldh, + const Real *eigenvalue, + const T one) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < n) + { + hcc[i * ldh + i] = eigenvalue[i]; + scc[i * ldh + i] = one; + vcc[i * ldh + i] = one; + } +} + template void line_minimize_with_block_op::operator()(T* grad_out, T* hgrad_out, @@ -306,15 +325,15 @@ void apply_eigenvalues_op::operator()(const int& nba { const int threads_per_block = 256; const int blocks_per_grid_y = (nbase + threads_per_block - 1) / threads_per_block; - + dim3 grid(notconv, blocks_per_grid_y); - + auto vec_complex = reinterpret_cast*>(vectors); auto res_complex = reinterpret_cast*>(result); - + apply_eigenvalues_kernel<<>>( vec_complex, res_complex, eigenvalues, nbase, nbase_x, notconv); - + hipCheckOnDebug(); } @@ -328,14 +347,14 @@ void precondition_op::operator()(const int& dim, { const int threads_per_block = 256; const int blocks_per_grid_y = (dim + threads_per_block - 1) / threads_per_block; - + dim3 grid(notconv, blocks_per_grid_y); - + auto psi_complex = reinterpret_cast*>(psi_iter); - + precondition_kernel<<>>( psi_complex, precondition, eigenvalues, dim, nbase, notconv); - + hipCheckOnDebug(); } @@ -347,10 +366,62 @@ void normalize_op::operator()(const int& dim, Real* psi_norm) { auto psi_complex = reinterpret_cast*>(psi_iter); - + normalize_kernel<<>>( psi_complex, psi_norm, dim, nbase, notconv); - + + hipCheckOnDebug(); +} + +template <> +void refresh_hcc_scc_vcc_op::operator()(const int &n, + double *hcc, + double *scc, + double *vcc, + const int &ldh, + const double *eigenvalue, + const double& one) +{ + int thread = 512; + int block = (n + thread - 1) / thread; + refresh_hcc_scc_vcc_kernel <<>> (n, hcc, scc, vcc, ldh, eigenvalue, one); + + hipCheckOnDebug(); +} + +template <> +void refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>::operator()(const int &n, + std::complex *hcc, + std::complex *scc, + std::complex *vcc, + const int &ldh, + const float *eigenvalue, + const std::complex& one) +{ + int thread = 512; + int block = (n + thread - 1) / thread; + refresh_hcc_scc_vcc_kernel, float> <<>> (n, reinterpret_cast*>(hcc), + reinterpret_cast*>(scc), reinterpret_cast*>(vcc), ldh, eigenvalue, + thrust::complex(one)); + + hipCheckOnDebug(); +} + +template <> +void refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>::operator()(const int &n, + std::complex *hcc, + std::complex *scc, + std::complex *vcc, + const int &ldh, + const double *eigenvalue, + const std::complex& one) +{ + int thread = 512; + int block = (n + thread - 1) / thread; + refresh_hcc_scc_vcc_kernel, double> <<>> (n, reinterpret_cast*>(hcc), + reinterpret_cast*>(scc), reinterpret_cast*>(vcc), ldh, eigenvalue, + thrust::complex(one)); + hipCheckOnDebug(); } @@ -367,4 +438,7 @@ template struct precondition_op; template struct normalize_op, base_device::DEVICE_GPU>; template struct normalize_op, base_device::DEVICE_GPU>; template struct normalize_op; +template struct refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>; +template struct refresh_hcc_scc_vcc_op, base_device::DEVICE_GPU>; +template struct refresh_hcc_scc_vcc_op; } \ No newline at end of file diff --git a/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu b/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu index a359ccda87..6730e0f936 100644 --- a/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu +++ b/source/source_hsolver/kernels/rocm/dngvd_op.hip.cu @@ -8,7 +8,7 @@ namespace hsolver { // NOTE: mimicked from ../cuda/dngvd_op.cu for three dngvd_op static hipsolverHandle_t hipsolver_H = nullptr; -// Test on DCU platform. When nstart is greater than 234, code on DCU performs better. +// Test on DCU platform. When nstart is greater than 234, code on DCU performs better. const int N_DCU = 234; void createGpuSolverHandle() { @@ -97,7 +97,7 @@ void dngvd_op::operator()(const base_device::DE hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(double) * eigenvalue.size(), hipMemcpyHostToDevice)); } - + } #endif // __LCAO @@ -112,7 +112,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const ba { // copied from ../cuda/dngvd_op.cu, "dngvd_op" assert(nstart == ldh); - + if (nstart > N_DCU){ hipErrcheck(hipMemcpy(_vcc, _hcc, sizeof(std::complex) * ldh * nstart, hipMemcpyDeviceToDevice)); // now vcc contains hcc @@ -170,7 +170,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const ba hipErrcheck(hipMemcpy(_eigenvalue, eigenvalue.data(), sizeof(float) * eigenvalue.size(), hipMemcpyHostToDevice)); } - + } template <> @@ -184,7 +184,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const b ) { // copied from ../cuda/dngvd_op.cu, "dngvd_op" - assert(nstart == ldh); + // assert(nstart == ldh); // save a copy of scc in case the diagonalization fails if (nstart > N_DCU){ @@ -253,7 +253,7 @@ void dngvd_op, base_device::DEVICE_GPU>::operator()(const b - + } #ifdef __LCAO From af8fe353b1458cd0cd019abe430aaa45fdb7ec0b Mon Sep 17 00:00:00 2001 From: Tianxiang Wang Date: Tue, 9 Sep 2025 16:03:10 +0800 Subject: [PATCH 08/13] Fix: resolve compile error with USE_ELPA=OFF + BUILD_TESTING=ON and switch to nvtx3 headers when CUDA_VERSION >= 12090 (#6497) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix: switch to nvtx3 headers when CUDA_VERSION >= 12090 Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. * Fix: resolve compile error with USE_ELPA=OFF + BUILD_TESTING=ON Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. --- source/source_base/timer.cpp | 6 ++- source/source_hsolver/test/CMakeLists.txt | 19 ++++++--- .../test/test_diago_hs_para.cpp | 42 ++++++++++++++----- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/source/source_base/timer.cpp b/source/source_base/timer.cpp index e7f22df70d..f55bd6776a 100644 --- a/source/source_base/timer.cpp +++ b/source/source_base/timer.cpp @@ -15,7 +15,11 @@ #include "source_base/formatter.h" #if defined(__CUDA) && defined(__USE_NVTX) -#include +#if CUDA_VERSION < 12090 +#include "nvToolsExt.h" +#else +#include "nvtx3/nvToolsExt.h" +#endif #include "source_io/module_parameter/parameter.h" #endif diff --git a/source/source_hsolver/test/CMakeLists.txt b/source/source_hsolver/test/CMakeLists.txt index e3fa6550fa..95b4362958 100644 --- a/source/source_hsolver/test/CMakeLists.txt +++ b/source/source_hsolver/test/CMakeLists.txt @@ -153,12 +153,19 @@ install(FILES diago_pexsi_parallel_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DI install(FILES parallel_k2d_test.sh DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) - -AddTest( - TARGET MODULE_HSOLVER_diago_hs_parallel - LIBS parameter ${math_libs} ELPA::ELPA base device MPI::MPI_CXX genelpa psi - SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../diago_elpa.cpp ../diago_scalapack.cpp -) +if (USE_ELPA) + AddTest( + TARGET MODULE_HSOLVER_diago_hs_parallel + LIBS parameter ${math_libs} ELPA::ELPA base device MPI::MPI_CXX genelpa psi + SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../diago_elpa.cpp ../diago_scalapack.cpp + ) +else() + AddTest( + TARGET MODULE_HSOLVER_diago_hs_parallel + LIBS parameter ${math_libs} base device MPI::MPI_CXX psi + SOURCES test_diago_hs_para.cpp ../diag_hs_para.cpp ../diago_pxxxgvx.cpp ../diago_scalapack.cpp + ) +endif() AddTest( TARGET MODULE_HSOLVER_linear_trans diff --git a/source/source_hsolver/test/test_diago_hs_para.cpp b/source/source_hsolver/test/test_diago_hs_para.cpp index 425fd3b238..ad7d05c716 100644 --- a/source/source_hsolver/test/test_diago_hs_para.cpp +++ b/source/source_hsolver/test/test_diago_hs_para.cpp @@ -160,7 +160,9 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, MPI_Comm_size(comm, &nproc); std::vector h_mat, s_mat, wfc, h_psi, s_psi; +#ifdef __ELPA std::vector::type> ekb_elpa(lda); +#endif std::vector::type> ekb_scalap(lda); std::vector::type> ekb_lapack(lda); @@ -176,32 +178,36 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, } // store all the times in a vector +#ifdef __ELPA std::vector time_elpa(case_numb, 0); +#endif std::vector time_scalap(case_numb, 0); std::vector time_lapack(case_numb, 0); if (my_rank == 0) { std::cout << "Random matrix "; } - for (int randomi = 0; randomi < case_numb; ++randomi) + for (int randomi = 0; randomi < case_numb; ++randomi) { - + if (my_rank == 0) { std::cout << randomi << " "; generate_random_hs(lda, randomi, h_mat, s_mat); } - + auto start = std::chrono::high_resolution_clock::now(); + auto end = std::chrono::high_resolution_clock::now(); +#ifdef __ELPA // ELPA MPI_Barrier(comm); - auto start = std::chrono::high_resolution_clock::now(); + start = std::chrono::high_resolution_clock::now(); for (int j=0;j(h_mat.data(), s_mat.data(), lda, nbands,ekb_elpa.data(), wfc.data(), comm, 1, nb); MPI_Barrier(comm); } MPI_Barrier(comm); - auto end = std::chrono::high_resolution_clock::now(); + end = std::chrono::high_resolution_clock::now(); time_elpa[randomi] = std::chrono::duration_cast(end - start).count(); - +#endif // scalapack start = std::chrono::high_resolution_clock::now(); @@ -215,8 +221,8 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, time_scalap[randomi] = std::chrono::duration_cast(end - start).count(); //LApack - if (my_rank == 0) - { + if (my_rank == 0) + { std::vector h_tmp, s_tmp; start = std::chrono::high_resolution_clock::now(); base_device::DEVICE_CPU* ctx = {}; @@ -239,26 +245,34 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, //COMPARE EKB for (int i = 0; i < nbands; ++i) { - typename GetTypeReal::type diff_elpa_lapack = std::abs(ekb_elpa[i] - ekb_lapack[i]); typename GetTypeReal::type diff_scalap_lapack = std::abs(ekb_scalap[i] - ekb_lapack[i]); +#ifdef __ELPA + typename GetTypeReal::type diff_elpa_lapack = std::abs(ekb_elpa[i] - ekb_lapack[i]); if (diff_elpa_lapack > 1e-6 || diff_scalap_lapack > 1e-6) +#else + if (diff_scalap_lapack > 1e-6) +#endif { +#ifdef __ELPA std::cout << "eigenvalue " << i << " by ELPA: " << ekb_elpa[i] << std::endl; +#endif std::cout << "eigenvalue " << i << " by Scalapack: " << ekb_scalap[i] << std::endl; std::cout << "eigenvalue " << i << " by Lapack: " << ekb_lapack[i] << std::endl; } } } - MPI_Barrier(comm); + MPI_Barrier(comm); } if (my_rank == 0) { +#ifdef __ELPA std::cout << "\nELPA Time : "; for (int i=0; i < case_numb;i++) {std::cout << time_elpa[i] << " ";} std::cout << std::endl; +#endif std::cout << "scalapack Time: "; for (int i=0; i < case_numb;i++) @@ -271,21 +285,29 @@ void test_performance(int lda, int nb, int nbands, MPI_Comm comm,int case_numb, std::cout << std::endl; // print out the average time and speedup +#ifdef __ELPA double avg_time_elpa = 0; +#endif double avg_time_scalap = 0; double avg_time_lapack = 0; for (int i=0; i < case_numb;i++) { +#ifdef __ELPA avg_time_elpa += time_elpa[i]; +#endif avg_time_scalap += time_scalap[i]; avg_time_lapack += time_lapack[i]; } +#ifdef __ELPA avg_time_elpa /= case_numb; +#endif avg_time_scalap /= case_numb; avg_time_lapack /= case_numb; std::cout << "Average Lapack Time : " << avg_time_lapack << " ms" << std::endl; +#ifdef __ELPA std::cout << "Average ELPA Time : " << avg_time_elpa << " ms, Speedup: " << avg_time_lapack / avg_time_elpa << std::endl; +#endif std::cout << "Average Scalapack Time: " << avg_time_scalap << " ms, Speedup: " << avg_time_lapack / avg_time_scalap << std::endl; } } From 2eaa1bbeb4e4d9e9c6bea37bcd0fd22e67bcd786 Mon Sep 17 00:00:00 2001 From: Critsium Date: Wed, 10 Sep 2025 15:39:20 +0800 Subject: [PATCH 09/13] Fix dsp compilation problem (#6499) --- source/source_main/driver_run.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/source_main/driver_run.cpp b/source/source_main/driver_run.cpp index 579b8fbd08..78f4700a8c 100644 --- a/source/source_main/driver_run.cpp +++ b/source/source_main/driver_run.cpp @@ -6,6 +6,7 @@ #include "source_io/para_json.h" #include "source_io/print_info.h" #include "source_md/run_md.h" +#include "source_base/global_variable.h" #include "source_base/module_device/device.h" #include "source_base/module_device/memory_op.h" #include "source_base/kernels/math_kernel_op.h" From 5e8382be74b5265053d96e48403defc8dbc027e0 Mon Sep 17 00:00:00 2001 From: Tianxiang Wang Date: Wed, 10 Sep 2025 15:44:32 +0800 Subject: [PATCH 10/13] Fix: Fix crash in Debug build with multi-GPU due to forced cudaSetDevice(0) (#6498) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by:Tianxiang Wang, Contributed under MetaX Integrated Circuits (Shanghai) Co., Ltd. --- source/source_base/module_device/device.h | 12 ++++++++++-- source/source_base/module_device/output_device.cpp | 4 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/source/source_base/module_device/device.h b/source/source_base/module_device/device.h index ed560affdf..a073bdab91 100644 --- a/source/source_base/module_device/device.h +++ b/source/source_base/module_device/device.h @@ -65,8 +65,8 @@ std::string get_device_flag(const std::string& device, /** * @brief Get the rank of current node * Note that GPU can only be binded with CPU in the same node - * - * @return int + * + * @return int */ int get_node_rank(); int get_node_rank_with_mpi_shared(const MPI_Comm mpi_comm = MPI_COMM_WORLD); @@ -91,6 +91,14 @@ void record_device_memory(const Device* dev, std::ofstream& ofs_device, std::str return; } +#if defined(__CUDA) || defined(__ROCM) +template <> +void print_device_info(const base_device::DEVICE_GPU *ctx, std::ofstream &ofs_device); + +template <> +void record_device_memory(const base_device::DEVICE_GPU* dev, std::ofstream& ofs_device, std::string str, size_t size); +#endif + } // end of namespace information } // end of namespace base_device diff --git a/source/source_base/module_device/output_device.cpp b/source/source_base/module_device/output_device.cpp index a0cf817844..1d0f018814 100644 --- a/source/source_base/module_device/output_device.cpp +++ b/source/source_base/module_device/output_device.cpp @@ -190,7 +190,7 @@ void print_device_info( ofs_device << "Detected " << deviceCount << " CUDA Capable device(s)\n"; } int dev = 0, driverVersion = 0, runtimeVersion = 0; - cudaErrcheck(cudaSetDevice(dev)); + cudaErrcheck(cudaGetDevice(&dev)); cudaDeviceProp deviceProp; cudaErrcheck(cudaGetDeviceProperties(&deviceProp, dev)); ofs_device << "\nDevice " << dev << ":\t " << deviceProp.name << std::endl; @@ -429,7 +429,7 @@ void print_device_info( ofs_device << "Detected " << deviceCount << " CUDA Capable device(s)\n"; } int dev = 0, driverVersion = 0, runtimeVersion = 0; - hipErrcheck(hipSetDevice(dev)); + hipErrcheck(hipGetDevice(&dev)); hipDeviceProp_t deviceProp; hipErrcheck(hipGetDeviceProperties(&deviceProp, dev)); ofs_device << "\nDevice " << dev << ":\t " << deviceProp.name << std::endl; From a6d529744df37da5e2aba20ba706a78486575138 Mon Sep 17 00:00:00 2001 From: zgn-26714 <3022939753@qq.com> Date: Thu, 11 Sep 2025 14:48:56 +0800 Subject: [PATCH 11/13] Removed the temporary variable DMRGint_full when transitioning from 2D block parallelism to serial in Hcontainer(develop) (#6489) * delete tem Hcontainer to reduce memory usage * simplify the compute code * change DM2D_tmp to dm2d_tmp, use vector instead of new --- source/source_lcao/module_gint/gint.h | 2 +- source/source_lcao/module_gint/gint_old.cpp | 80 +++++++++---------- .../module_gint/temp_gint/gint_common.cpp | 70 ++++++++-------- .../module_gint/temp_gint/gint_info.h | 1 + .../source_lcao/module_lr/utils/gint_move.hpp | 4 +- 5 files changed, 80 insertions(+), 77 deletions(-) diff --git a/source/source_lcao/module_gint/gint.h b/source/source_lcao/module_gint/gint.h index 6ca6f53eab..9810b913f2 100644 --- a/source/source_lcao/module_gint/gint.h +++ b/source/source_lcao/module_gint/gint.h @@ -265,7 +265,7 @@ class Gint { std::vector*> DMRGint; //! tmp tools used in transfer_DM2DtoGrid - hamilt::HContainer* DMRGint_full = nullptr; + hamilt::HContainer* dm2d_tmp = nullptr; std::vector> pvdpRx_reduced; std::vector> pvdpRy_reduced; diff --git a/source/source_lcao/module_gint/gint_old.cpp b/source/source_lcao/module_gint/gint_old.cpp index caaf2f92c7..4c0f11c12a 100644 --- a/source/source_lcao/module_gint/gint_old.cpp +++ b/source/source_lcao/module_gint/gint_old.cpp @@ -33,7 +33,7 @@ Gint::~Gint() { delete this->hRGint_tmp[is]; } #ifdef __MPI - delete this->DMRGint_full; + delete this->dm2d_tmp; #endif } @@ -171,10 +171,9 @@ void Gint::initialize_pvpR(const UnitCell& ucell_in, const Grid_Driver* gd, cons this->hRGint_tmp[is] = new hamilt::HContainer(ucell_in.nat); } #ifdef __MPI - if (this->DMRGint_full != nullptr) { - delete this->DMRGint_full; + if (this->dm2d_tmp != nullptr) { + delete this->dm2d_tmp; } - this->DMRGint_full = new hamilt::HContainer(ucell_in.nat); #endif } @@ -210,12 +209,6 @@ void Gint::initialize_pvpR(const UnitCell& ucell_in, const Grid_Driver* gd, cons ModuleBase::Memory::record("Gint::DMRGint", this->DMRGint[0]->get_memory_size() * this->DMRGint.size()*nspin); -#ifdef __MPI - this->DMRGint_full->insert_ijrs(this->gridt->get_ijr_info(), ucell_in, npol); - this->DMRGint_full->allocate(nullptr, true); - ModuleBase::Memory::record("Gint::DMRGint_full", - this->DMRGint_full->get_memory_size()); -#endif } } @@ -231,9 +224,7 @@ void Gint::reset_DMRGint(const int& nspin) { for (auto& d : this->DMRGint) { d->allocate(nullptr, false); } #ifdef __MPI - delete this->DMRGint_full; - this->DMRGint_full = new hamilt::HContainer(*this->hRGint); - this->DMRGint_full->allocate(nullptr, false); + delete this->dm2d_tmp; #endif } } @@ -262,37 +253,46 @@ void Gint::transfer_DM2DtoGrid(std::vector*> DM2D) { } else // NSPIN=4 case { #ifdef __MPI - hamilt::transferParallels2Serials(*DM2D[0], this->DMRGint_full); -#else - this->DMRGint_full = DM2D[0]; -#endif - std::vector tmp_pointer(4, nullptr); - for (int iap = 0; iap < this->DMRGint_full->size_atom_pairs(); ++iap) { - auto& ap = this->DMRGint_full->get_atom_pair(iap); - int iat1 = ap.get_atom_i(); - int iat2 = ap.get_atom_j(); - for (int ir = 0; ir < ap.get_R_size(); ++ir) { - const ModuleBase::Vector3 r_index = ap.get_R_index(ir); - for (int is = 0; is < 4; is++) { - tmp_pointer[is] = this->DMRGint[is] - ->find_matrix(iat1, iat2, r_index) - ->get_pointer(); - } - double* data_full = ap.get_pointer(ir); - for (int irow = 0; irow < ap.get_row_size(); irow += 2) { - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer[0])++ = data_full[icol]; - *(tmp_pointer[1])++ = data_full[icol + 1]; - } - data_full += ap.get_col_size(); - for (int icol = 0; icol < ap.get_col_size(); icol += 2) { - *(tmp_pointer[2])++ = data_full[icol]; - *(tmp_pointer[3])++ = data_full[icol + 1]; + // is=0:↑↑, 1:↑↓, 2:↓↑, 3:↓↓ + const int row_set[4] = {0, 0, 1, 1}; + const int col_set[4] = {0, 1, 0, 1}; + int mg = DM2D[0]->get_paraV()->get_global_row_size()/2; + int ng = DM2D[0]->get_paraV()->get_global_col_size()/2; + int nb = DM2D[0]->get_paraV()->get_block_size()/2; + int blacs_ctxt = DM2D[0]->get_paraV()->blacs_ctxt; + std::vector iat2iwt(ucell->nat); + for (int iat = 0; iat < ucell->nat; iat++) { + iat2iwt[iat] = ucell->get_iat2iwt()[iat]/2; + } + Parallel_Orbitals *pv = new Parallel_Orbitals(); + pv->set(mg, ng, nb, blacs_ctxt); + pv->set_atomic_trace(iat2iwt.data(), ucell->nat, mg); + auto ijr_info = DM2D[0]->get_ijr_info(); + this-> dm2d_tmp = new hamilt::HContainer(pv, nullptr, &ijr_info); + ModuleBase::Memory::record("Gint::dm2d_tmp", this->dm2d_tmp->get_memory_size()); + for (int is = 0; is < 4; is++){ + for (int iap = 0; iap < DM2D[0]->size_atom_pairs(); ++iap) { + auto& ap = DM2D[0]->get_atom_pair(iap); + int iat1 = ap.get_atom_i(); + int iat2 = ap.get_atom_j(); + for (int ir = 0; ir < ap.get_R_size(); ++ir) { + const ModuleBase::Vector3 r_index = ap.get_R_index(ir); + double* matrix_out = this -> dm2d_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer(); + double* matrix_in = ap.get_pointer(ir); + for (int irow = 0; irow < ap.get_row_size()/2; irow ++) { + for (int icol = 0; icol < ap.get_col_size()/2; icol++){ + int index_i = irow* ap.get_col_size()/2 + icol; + int index_j = (irow*2+row_set[is]) * ap.get_col_size() + icol*2+col_set[is]; + matrix_out[index_i] = matrix_in[index_j]; + } } - data_full += ap.get_col_size(); } } + hamilt::transferParallels2Serials( *(this->dm2d_tmp), this->DMRGint[is]); } +#else + //this->DMRGint_full = DM2D[0]; +#endif } ModuleBase::timer::tick("Gint", "transfer_DMR"); } \ No newline at end of file diff --git a/source/source_lcao/module_gint/temp_gint/gint_common.cpp b/source/source_lcao/module_gint/temp_gint/gint_common.cpp index fc3248ad93..7c3649641d 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_common.cpp +++ b/source/source_lcao/module_gint/temp_gint/gint_common.cpp @@ -163,44 +163,46 @@ void transfer_dm_2d_to_gint( } else // NSPIN=4 case { #ifdef __MPI - const int npol = 2; - HContainer dm_full = gint_info.get_hr(npol); - hamilt::transferParallels2Serials(*dm[0], &dm_full); -#else - HContainer& dm_full = *(dm[0]); -#endif - std::vector tmp_pointer(4, nullptr); - for (int iap = 0; iap < dm_full.size_atom_pairs(); iap++) - { - auto& ap = dm_full.get_atom_pair(iap); - const int iat1 = ap.get_atom_i(); - const int iat2 = ap.get_atom_j(); - for (int ir = 0; ir < ap.get_R_size(); ir++) - { - const ModuleBase::Vector3 r_index = ap.get_R_index(ir); - for (int is = 0; is < 4; is++) - { - tmp_pointer[is] = - dm_gint[is].find_matrix(iat1, iat2, r_index)->get_pointer(); - } - T* data_full = ap.get_pointer(ir); - for (int irow = 0; irow < ap.get_row_size(); irow += 2) - { - for (int icol = 0; icol < ap.get_col_size(); icol += 2) - { - *(tmp_pointer[0])++ = data_full[icol]; - *(tmp_pointer[1])++ = data_full[icol + 1]; - } - data_full += ap.get_col_size(); - for (int icol = 0; icol < ap.get_col_size(); icol += 2) - { - *(tmp_pointer[2])++ = data_full[icol]; - *(tmp_pointer[3])++ = data_full[icol + 1]; + // is=0:↑↑, 1:↑↓, 2:↓↑, 3:↓↓ + const int row_set[4] = {0, 0, 1, 1}; + const int col_set[4] = {0, 1, 0, 1}; + int mg = dm[0]->get_paraV()->get_global_row_size()/2; + int ng = dm[0]->get_paraV()->get_global_col_size()/2; + int nb = dm[0]->get_paraV()->get_block_size()/2; + int blacs_ctxt = dm[0]->get_paraV()->blacs_ctxt; + const UnitCell* ucell = gint_info.get_ucell(); + std::vector iat2iwt(ucell->nat); + for (int iat = 0; iat < ucell->nat; iat++) { + iat2iwt[iat] = ucell->get_iat2iwt()[iat]/2; + } + Parallel_Orbitals *pv = new Parallel_Orbitals(); + pv->set(mg, ng, nb, blacs_ctxt); + pv->set_atomic_trace(iat2iwt.data(), ucell->nat, mg); + auto ijr_info = dm[0]->get_ijr_info(); + HContainer* dm2d_tmp = new hamilt::HContainer(pv, nullptr, &ijr_info); + for (int is = 0; is < 4; is++){ + for (int iap = 0; iap < dm[0]->size_atom_pairs(); ++iap) { + auto& ap = dm[0]->get_atom_pair(iap); + int iat1 = ap.get_atom_i(); + int iat2 = ap.get_atom_j(); + for (int ir = 0; ir < ap.get_R_size(); ++ir) { + const ModuleBase::Vector3 r_index = ap.get_R_index(ir); + T* matrix_out = dm2d_tmp -> find_matrix(iat1, iat2, r_index)->get_pointer(); + T* matrix_in = ap.get_pointer(ir); + for (int irow = 0; irow < ap.get_row_size()/2; irow ++) { + for (int icol = 0; icol < ap.get_col_size()/2; icol ++) { + int index_i = irow* ap.get_col_size()/2 + icol; + int index_j = (irow*2+row_set[is]) * ap.get_col_size() + icol*2+col_set[is]; + matrix_out[index_i] = matrix_in[index_j]; + } } - data_full += ap.get_col_size(); } } + hamilt::transferParallels2Serials( *dm2d_tmp, &dm_gint[is]); } +#else + //HContainer& dm_full = *(dm[0]); +#endif } ModuleBase::timer::tick("Gint", "transfer_dm_2d_to_gint"); } diff --git a/source/source_lcao/module_gint/temp_gint/gint_info.h b/source/source_lcao/module_gint/temp_gint/gint_info.h index 7cfe476d25..356a62127e 100644 --- a/source/source_lcao/module_gint/temp_gint/gint_info.h +++ b/source/source_lcao/module_gint/temp_gint/gint_info.h @@ -38,6 +38,7 @@ class GintInfo const std::vector& get_trace_lo() const{ return trace_lo_; } int get_lgd() const { return lgd_; } int get_nat() const { return ucell_->nat; } // return the number of atoms in the unitcell + const UnitCell* get_ucell() const { return ucell_; } int get_local_mgrid_num() const { return localcell_info_->get_mgrids_num(); } double get_mgrid_volume() const { return meshgrid_info_->get_volume(); } diff --git a/source/source_lcao/module_lr/utils/gint_move.hpp b/source/source_lcao/module_lr/utils/gint_move.hpp index 0faa68f39d..6b4a8c82e6 100644 --- a/source/source_lcao/module_lr/utils/gint_move.hpp +++ b/source/source_lcao/module_lr/utils/gint_move.hpp @@ -60,8 +60,8 @@ Gint& Gint::operator=(Gint&& rhs) this->pvdpRz_reduced = std::move(rhs.pvdpRz_reduced); this->DMRGint = std::move(rhs.DMRGint); this->hRGint_tmp = std::move(rhs.hRGint_tmp); - this->DMRGint_full = rhs.DMRGint_full; - rhs.DMRGint_full = nullptr; + this->dm2d_tmp = rhs.dm2d_tmp; + rhs.dm2d_tmp = nullptr; return *this; } From 94cebd023df541105328425f79919a92366ea00e Mon Sep 17 00:00:00 2001 From: Erjie Wu <110683255+ErjieWu@users.noreply.github.com> Date: Fri, 12 Sep 2025 16:55:10 +0800 Subject: [PATCH 12/13] Update version to 3.9.0.14 (#6504) --- source/source_main/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/source_main/version.h b/source/source_main/version.h index fcb85ca8cf..d8d70ffa97 100644 --- a/source/source_main/version.h +++ b/source/source_main/version.h @@ -1,3 +1,3 @@ #ifndef VERSION -#define VERSION "v3.9.0.13" +#define VERSION "v3.9.0.14" #endif From 3ead988d66aca0651e8806bb2a11a2ebe1859052 Mon Sep 17 00:00:00 2001 From: Yang Shengxin Date: Thu, 18 Sep 2025 16:48:58 +0800 Subject: [PATCH 13/13] Refactor: Remove the GlobalC from sep_cell and vsep_cell * Removed GlobalC::sep_cell and GlobalC::vsep_cell from GlobalC * Integrated sep_cell into UnitCell * Integrated vsep_cell into esolver_ks_pw * Added empty constructors and destructors for Sep_Pot and Sep_Cell to facilitate unit testing compilation --- .../test/symmetry_test_analysis.cpp | 26 ++++---- .../test/symmetry_test_symtrz.cpp | 6 +- source/source_cell/sep_cell.cpp | 16 ++--- source/source_cell/sep_cell.h | 12 ++-- source/source_cell/test/klist_test.cpp | 4 ++ source/source_cell/test/klist_test_para.cpp | 4 ++ source/source_cell/test/sepcell_test.cpp | 12 ++-- .../test/support/mock_unitcell.cpp | 4 ++ source/source_cell/unitcell.cpp | 13 ++-- source/source_cell/unitcell.h | 2 + source/source_esolver/esolver_ks_pw.cpp | 26 +++++--- source/source_esolver/esolver_ks_pw.h | 9 ++- source/source_esolver/test/for_test.h | 6 +- .../module_dm/test/tmp_mocks.cpp | 6 +- source/source_estate/module_pot/pot_sep.cpp | 7 ++- source/source_estate/module_pot/pot_sep.h | 7 ++- .../module_pot/potential_new.cpp | 9 +-- .../source_estate/module_pot/potential_new.h | 9 ++- .../module_pot/potential_types.cpp | 2 +- .../test/elecstate_base_test.cpp | 4 ++ .../test/elecstate_print_test.cpp | 14 +++-- .../source_estate/test/elecstate_pw_test.cpp | 6 +- .../source_estate/test/potential_new_test.cpp | 4 ++ .../module_surchem/test/setcell.h | 6 +- .../module_vdw/test/vdw_test.cpp | 62 ++++++++++--------- .../source_hamilt/module_xc/test/xc3_mock.h | 5 ++ source/source_io/test/bessel_basis_test.cpp | 46 +++++++------- source/source_io/test/for_testing_klist.h | 4 ++ source/source_io/test/outputlog_test.cpp | 4 ++ source/source_io/test/read_wf2rho_pw_test.cpp | 16 +++-- .../module_hcontainer/test/tmp_mocks.cpp | 8 ++- .../ri_benchmark/test/ri_benchmark_test.cpp | 4 ++ .../module_operator_lcao/test/tmp_mocks.cpp | 4 ++ .../test/symmetry_rotation_test.cpp | 4 ++ source/source_pw/module_pwdft/VSep_in_pw.cpp | 28 +++++---- source/source_pw/module_pwdft/VSep_in_pw.h | 15 ++--- source/source_relax/test/for_test.h | 6 +- source/source_relax/test/relax_test.h | 6 +- 38 files changed, 277 insertions(+), 149 deletions(-) diff --git a/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp b/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp index 5d76f74957..c2947eb3be 100644 --- a/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp +++ b/source/source_cell/module_symmetry/test/symmetry_test_analysis.cpp @@ -6,7 +6,7 @@ * 1-1. test if the bravis lattice analysis is right; * 1-2. check if matrix-type and vector3-type; * input and optimized lattice vectors are right; - * 1-3. double-check for if `gtrans_convert` + * 1-3. double-check for if `gtrans_convert` * gives the same results as `veccon`; * 1-4. check `invmap` function gives the right result; * 1-5 test if `gmatrix_convert` and `gmatrix_convert_int` @@ -14,7 +14,7 @@ * 2. function: `atom_ordering_new: * test the new atom-sort algorithm gives the right result; *3. function: `pricell`: - * test if the number of primitive cells are right, + * test if the number of primitive cells are right, * using cases whose space group * is different from its point group. ***********************************************/ @@ -34,6 +34,10 @@ UnitCell::UnitCell(){} UnitCell::~UnitCell(){} Magnetism::Magnetism(){} Magnetism::~Magnetism() {} +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} TEST_F(SymmetryTest, AnalySys) { @@ -50,8 +54,8 @@ TEST_F(SymmetryTest, AnalySys) int cal_ibrav = symm.real_brav; EXPECT_EQ(cal_ibrav, ref_ibrav); EXPECT_EQ(cal_point_group, ref_point_group) << "ibrav=" << stru_lib[stru].ibrav; - - //2. input and optimized lattice, gtrans_convert and veccon + + //2. input and optimized lattice, gtrans_convert and veccon //input lattice EXPECT_EQ(symm.s1, ucell.a1); EXPECT_EQ(symm.s2, ucell.a2); @@ -73,7 +77,7 @@ TEST_F(SymmetryTest, AnalySys) symm.gtrans_convert(symm.gtrans, gtrans_optconf.data(), symm.nrotk, ucell.latvec, symm.optlat); symm.veccon(gtrans_veccon, gtrans_optconf_veccon, symm.nrotk, symm.s1, symm.s2, symm.s3, symm.a1, symm.a2, symm.a3); for(int i=0;i(gtrans_optconf_veccon[i*3], + EXPECT_EQ(gtrans_optconf[i], ModuleBase::Vector3(gtrans_optconf_veccon[i*3], gtrans_optconf_veccon[i*3+1], gtrans_optconf_veccon[i*3+2])); delete[] gtrans_veccon; delete[] gtrans_optconf_veccon; @@ -108,7 +112,7 @@ TEST_F(SymmetryTest, AnalySys) symm.gmatrix_convert_int(symm.gmatrix, gmatrix_opt, symm.nrotk, ucell.latvec, symm.optlat); //1->2 symm.gmatrix_convert_int(gmatrix_opt, gmatrix_input_back, symm.nrotk, symm.optlat, ucell.latvec); //2->3 symm.gmatrix_convert_int(gmatrix_input_back, gmatrix_opt_back, symm.nrotk, ucell.latvec, symm.optlat); //3->4 - + symm.gmatrix_convert(symm.gmatrix, kgmatrix_nonint, symm.nrotk, ucell.latvec, ucell.G); for (int i=0;i allocate_pos(ModuleSymmetry::Symmetry& symm, UnitCell& ucell) { @@ -46,7 +50,7 @@ TEST_F(SymmetryTest, ForceSymmetry) { auto check_force = [](stru_& conf, ModuleBase::matrix& force) { - // 1. check zeros + // 1. check zeros for (auto iat : conf.force_zero_iat) for (int j = 0; j < 3; ++j) EXPECT_NEAR(force(iat, j), 0.0, DOUBLETHRESHOLD); diff --git a/source/source_cell/sep_cell.cpp b/source/source_cell/sep_cell.cpp index 814ea15cba..e7ec5f1baf 100644 --- a/source/source_cell/sep_cell.cpp +++ b/source/source_cell/sep_cell.cpp @@ -4,15 +4,15 @@ #include "source_base/global_variable.h" #include "source_base/parallel_common.h" #include "source_base/tool_title.h" -#include "source_cell/unitcell.h" #include #include +#include -namespace GlobalC -{ -Sep_Cell sep_cell; -} +// namespace GlobalC +// { +// Sep_Cell sep_cell; +// } Sep_Cell::Sep_Cell() noexcept : ntype(0), omega(0.0), tpiba2(0.0) { @@ -48,7 +48,7 @@ void Sep_Cell::set_omega(const double omega_in, const double tpiba2_in) int Sep_Cell::read_sep_potentials(std::ifstream& ifpos, const std::string& pp_dir, std::ofstream& ofs_running, - UnitCell& ucell) + std::vector& ucell_atom_label) { ModuleBase::TITLE("Sep_Cell", "read_sep_potentials"); @@ -71,10 +71,10 @@ int Sep_Cell::read_sep_potentials(std::ifstream& ifpos, ss >> atom_label >> enable_tmp; // Validate atom label - if (atom_label != ucell.atom_label[i]) + if (atom_label != ucell_atom_label[i]) { GlobalV::ofs_running << "Sep potential and atom order do not match. " - << "Expected: " << ucell.atoms[i].label << ", Got: " << atom_label << std::endl; + << "Expected: " << ucell_atom_label[i] << ", Got: " << atom_label << std::endl; return false; } this->sep_enable[i] = enable_tmp; diff --git a/source/source_cell/sep_cell.h b/source/source_cell/sep_cell.h index ebea3e3e6c..03cddca2ea 100644 --- a/source/source_cell/sep_cell.h +++ b/source/source_cell/sep_cell.h @@ -4,9 +4,9 @@ #define SEP_CELL #include "source_cell/sep.h" -#include "source_cell/unitcell.h" #include +#include #include class Sep_Cell @@ -25,7 +25,7 @@ class Sep_Cell int read_sep_potentials(std::ifstream& ifpos, const std::string& pp_dir, std::ofstream& ofs_running, - UnitCell& ucell); + std::vector& ucell_atom_label); #ifdef __MPI // Broadcasts the Sep_Cell object to all processes @@ -66,9 +66,9 @@ class Sep_Cell double tpiba2; // tpiba ^ 2 }; -namespace GlobalC -{ -extern Sep_Cell sep_cell; -} +// namespace GlobalC +// { +// extern Sep_Cell sep_cell; +// } #endif // SEP_CEll diff --git a/source/source_cell/test/klist_test.cpp b/source/source_cell/test/klist_test.cpp index ee9c320409..5d9c75ff18 100644 --- a/source/source_cell/test/klist_test.cpp +++ b/source/source_cell/test/klist_test.cpp @@ -83,6 +83,10 @@ Soc::~Soc() Fcoef::~Fcoef() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} /************************************************ diff --git a/source/source_cell/test/klist_test_para.cpp b/source/source_cell/test/klist_test_para.cpp index 2b03a169ab..789e58e8e5 100644 --- a/source/source_cell/test/klist_test_para.cpp +++ b/source/source_cell/test/klist_test_para.cpp @@ -86,6 +86,10 @@ Soc::~Soc() Fcoef::~Fcoef() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} /************************************************ diff --git a/source/source_cell/test/sepcell_test.cpp b/source/source_cell/test/sepcell_test.cpp index d03668c780..11316355e1 100644 --- a/source/source_cell/test/sepcell_test.cpp +++ b/source/source_cell/test/sepcell_test.cpp @@ -134,7 +134,7 @@ TEST_F(SepCellTest, ReadSepPotentialsSuccess) sep_cell.init(ucell.ntype); std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); - int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell); + int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell.atom_label); ifs.close(); std::remove("dummy_ofs_running.tmp"); @@ -153,8 +153,8 @@ TEST_F(SepCellTest, ReadSepPotentialsSuccess) EXPECT_EQ(seps[0].label, ""); // Default value EXPECT_TRUE(seps[1].is_enable); // Default value - EXPECT_EQ(seps[1].mesh, 1038); // Default value - EXPECT_EQ(seps[1].label, "F"); // Default value + EXPECT_EQ(seps[1].mesh, 1038); // Default value + EXPECT_EQ(seps[1].label, "F"); // Default value EXPECT_DOUBLE_EQ(seps[1].r_in, 0.0); EXPECT_DOUBLE_EQ(seps[1].r_out, 2.5); EXPECT_DOUBLE_EQ(seps[1].r_power, 20.0); @@ -179,7 +179,7 @@ TEST_F(SepCellTest, ReadSepPotentialsNoSepFilesSection) std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); sep_cell.init(ucell.ntype); - int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell); + int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell.atom_label); ifs.close(); std::remove("dummy_ofs_running.tmp"); @@ -202,7 +202,7 @@ TEST_F(SepCellTest, BcastSepCell) sep_cell.init(ucell.ntype); std::ofstream ofs_running_dummy("dummy_ofs_running.tmp"); - int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell); + int result = sep_cell.read_sep_potentials(ifs, pp_dir, ofs_running_dummy, ucell.atom_label); ifs.close(); std::remove("dummy_ofs_running.tmp"); @@ -258,7 +258,7 @@ TEST_F(SepCellTest, BcastSepCell) EXPECT_DOUBLE_EQ(seps[1].r[0], 3.4643182373e-06); EXPECT_DOUBLE_EQ(seps[1].rv[0], -2.0868200000e-05); EXPECT_DOUBLE_EQ(seps[1].r[7], 2.8965849122e-05); - EXPECT_DOUBLE_EQ(seps[1].rv[7], -1.9723800000e-05); + EXPECT_DOUBLE_EQ(seps[1].rv[7], -1.9723800000e-05); } #endif // __MPI diff --git a/source/source_cell/test/support/mock_unitcell.cpp b/source/source_cell/test/support/mock_unitcell.cpp index 46d7303f56..ce8f7460f3 100644 --- a/source/source_cell/test/support/mock_unitcell.cpp +++ b/source/source_cell/test/support/mock_unitcell.cpp @@ -17,6 +17,10 @@ UnitCell::~UnitCell() { delete[] atoms; } } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} void UnitCell::print_cell(std::ofstream& ofs) const {} diff --git a/source/source_cell/unitcell.cpp b/source/source_cell/unitcell.cpp index b607623408..e79a4206e8 100644 --- a/source/source_cell/unitcell.cpp +++ b/source/source_cell/unitcell.cpp @@ -252,8 +252,11 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) // readl sep potential, currently using the pseudopotential folder (pseudo_dir in INPUT) //========================== if (PARAM.inp.dfthalf_type > 0) { - GlobalC::sep_cell.init(this->ntype); - ok3 = GlobalC::sep_cell.read_sep_potentials(ifa, PARAM.inp.pseudo_dir, GlobalV::ofs_warning, *this); + // GlobalC::sep_cell.init(this->ntype); + // ok3 = GlobalC::sep_cell.read_sep_potentials(ifa, PARAM.inp.pseudo_dir, GlobalV::ofs_warning, this->atom_label); + + sep_cell.init(this->ntype); + ok3 = sep_cell.read_sep_potentials(ifa, PARAM.inp.pseudo_dir, GlobalV::ofs_warning, this->atom_label); } //========================== // call read_atom_positions @@ -281,7 +284,8 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) #ifdef __MPI unitcell::bcast_unitcell(*this); - GlobalC::sep_cell.bcast_sep_cell(); + // GlobalC::sep_cell.bcast_sep_cell(); + sep_cell.bcast_sep_cell(); #endif //======================================================== @@ -345,7 +349,8 @@ void UnitCell::setup_cell(const std::string& fn, std::ofstream& log) //=================================== this->set_iat2itia(); - GlobalC::sep_cell.set_omega(this->omega, this->tpiba2); + // GlobalC::sep_cell.set_omega(this->omega, this->tpiba2); + sep_cell.set_omega(this->omega, this->tpiba2); return; } diff --git a/source/source_cell/unitcell.h b/source/source_cell/unitcell.h index 7e1b3ecb1e..fb58adf6f2 100644 --- a/source/source_cell/unitcell.h +++ b/source/source_cell/unitcell.h @@ -3,6 +3,7 @@ #include "source_base/global_function.h" #include "source_base/global_variable.h" +#include "source_cell/sep_cell.h" #include "source_estate/magnetism.h" #include "source_io/output.h" #include "module_symmetry/symmetry.h" @@ -16,6 +17,7 @@ class UnitCell { public: Atom* atoms = nullptr; + Sep_Cell sep_cell; bool set_atom_flag = false; // added on 2009-3-8 by mohan Magnetism magnet; // magnetism Yu Liu 2021-07-03 diff --git a/source/source_esolver/esolver_ks_pw.cpp b/source/source_esolver/esolver_ks_pw.cpp index 06cea7236f..e33ab28734 100644 --- a/source/source_esolver/esolver_ks_pw.cpp +++ b/source/source_esolver/esolver_ks_pw.cpp @@ -66,6 +66,11 @@ ESolver_KS_PW::~ESolver_KS_PW() // delete Hamilt this->deallocate_hamilt(); + if (this->vsep_cell != nullptr) + { + delete this->vsep_cell; + } + if (this->pelec != nullptr) { delete reinterpret_cast*>(this->pelec); @@ -141,6 +146,14 @@ void ESolver_KS_PW::before_all_runners(UnitCell& ucell, const Input_p //! 3) inititlize the charge density. this->chr.allocate(inp.nspin); + // 3.5) initialize DFT-1/2 + if (PARAM.inp.dfthalf_type > 0) + { + this->vsep_cell = new VSep; + this->vsep_cell->init_vsep(*this->pw_rhod, ucell.sep_cell); + } + + //! 4) initialize the potential. if (this->pelec->pot == nullptr) { @@ -151,7 +164,8 @@ void ESolver_KS_PW::before_all_runners(UnitCell& ucell, const Input_p &(this->sf), &(this->solvent), &(this->pelec->f_en.etxc), - &(this->pelec->f_en.vtxc)); + &(this->pelec->f_en.vtxc), + this->vsep_cell); } //! 5) Initalize local pseudopotential @@ -210,11 +224,6 @@ void ESolver_KS_PW::before_all_runners(UnitCell& ucell, const Input_p exx_helper.set_wg(&this->pelec->wg); } } - - // 10) initialize DFT-1/2 - if (PARAM.inp.dfthalf_type > 0) { - GlobalC::vsep_cell.init_vsep(*this->pw_rhod); - } } template @@ -271,8 +280,9 @@ void ESolver_KS_PW::before_scf(UnitCell& ucell, const int istep) //---------------------------------------------------------- //! 4.5) DFT-1/2 calculations, sep potential need to generate before effective potential calculation //---------------------------------------------------------- - if (PARAM.inp.dfthalf_type > 0) { - GlobalC::vsep_cell.generate_vsep_r(this->pw_rhod[0], this->sf.strucFac); + if (PARAM.inp.dfthalf_type > 0) + { + this->vsep_cell->generate_vsep_r(this->pw_rhod[0], this->sf.strucFac, ucell.sep_cell); } //---------------------------------------------------------- diff --git a/source/source_esolver/esolver_ks_pw.h b/source/source_esolver/esolver_ks_pw.h index d2efb640bc..523ae91939 100644 --- a/source/source_esolver/esolver_ks_pw.h +++ b/source/source_esolver/esolver_ks_pw.h @@ -1,10 +1,11 @@ #ifndef ESOLVER_KS_PW_H #define ESOLVER_KS_PW_H #include "./esolver_ks.h" -#include "source_pw/module_pwdft/operator_pw/velocity_pw.h" #include "source_psi/psi_init.h" -#include "source_pw/module_pwdft/module_exx_helper/exx_helper.h" +#include "source_pw/module_pwdft/VSep_in_pw.h" #include "source_pw/module_pwdft/global.h" +#include "source_pw/module_pwdft/module_exx_helper/exx_helper.h" +#include "source_pw/module_pwdft/operator_pw/velocity_pw.h" #include #include @@ -59,6 +60,9 @@ class ESolver_KS_PW : public ESolver_KS // psi_initializer controller psi::PSIInit* p_psi_init = nullptr; + // DFT-1/2 method + VSep* vsep_cell = nullptr; + Device* ctx = {}; base_device::AbacusDevice_t device = {}; @@ -71,7 +75,6 @@ class ESolver_KS_PW : public ESolver_KS using castmem_2d_d2h_op = base_device::memory::cast_memory_op, T, base_device::DEVICE_CPU, Device>; - }; } // namespace ModuleESolver #endif diff --git a/source/source_esolver/test/for_test.h b/source/source_esolver/test/for_test.h index c8defbb8e7..ba8c9bb415 100644 --- a/source/source_esolver/test/for_test.h +++ b/source/source_esolver/test/for_test.h @@ -71,4 +71,8 @@ pseudo::pseudo() pseudo::~pseudo() { } -#endif \ No newline at end of file +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} +#endif diff --git a/source/source_estate/module_dm/test/tmp_mocks.cpp b/source/source_estate/module_dm/test/tmp_mocks.cpp index d890a90beb..dcf803ca5f 100644 --- a/source/source_estate/module_dm/test/tmp_mocks.cpp +++ b/source/source_estate/module_dm/test/tmp_mocks.cpp @@ -45,6 +45,10 @@ pseudo::pseudo() pseudo::~pseudo() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} // constructor of UnitCell UnitCell::UnitCell() @@ -113,4 +117,4 @@ Record_adj::Record_adj() } Record_adj::~Record_adj() { -} \ No newline at end of file +} diff --git a/source/source_estate/module_pot/pot_sep.cpp b/source/source_estate/module_pot/pot_sep.cpp index a51a3a222e..e3c4bb0696 100644 --- a/source/source_estate/module_pot/pot_sep.cpp +++ b/source/source_estate/module_pot/pot_sep.cpp @@ -14,9 +14,12 @@ void PotSep::cal_fixed_v(double* vl_pseudo) // const_cast(this->vsep_)->generate_vsep_r(this->rho_basis_[0], this->sf_[0]); - for (int ir = 0; ir < this->rho_basis_->nrxx; ++ir) + if (vsep_cell != nullptr) { - vl_pseudo[ir] += GlobalC::vsep_cell.vsep_r[ir]; + for (int ir = 0; ir < this->rho_basis_->nrxx; ++ir) + { + vl_pseudo[ir] += vsep_cell->vsep_r[ir]; + } } ModuleBase::timer::tick("PotSep", "cal_fixed_v"); diff --git a/source/source_estate/module_pot/pot_sep.h b/source/source_estate/module_pot/pot_sep.h index d42fcfe7a9..fce110cb40 100644 --- a/source/source_estate/module_pot/pot_sep.h +++ b/source/source_estate/module_pot/pot_sep.h @@ -23,9 +23,10 @@ class PotSep : public PotBase // this->fixed_mode = true; // this->dynamic_mode = false; // } - PotSep(const ModuleBase::ComplexMatrix* sf_in, const ModulePW::PW_Basis* rho_basis_in) : sf_(sf_in) + PotSep(const ModuleBase::ComplexMatrix* sf_in, const ModulePW::PW_Basis* rho_basis_in, const VSep* vsep_cell_in) + : sf_(sf_in), vsep_cell(vsep_cell_in) { - assert(GlobalC::vsep_cell.vsep_form.nr == this->sf_->nr); + assert(vsep_cell->vsep_form.nr == this->sf_->nr); // assert(this->vsep_->vsep_form.nr == this->sf_->nr); this->rho_basis_ = rho_basis_in; // this->ntype_ = this->vsep_->vsep_form.nr; @@ -35,7 +36,7 @@ class PotSep : public PotBase void cal_fixed_v(double* vl_pseudo) override; - // const VSep* vsep_ = nullptr; + const VSep* vsep_cell = nullptr; const ModuleBase::ComplexMatrix* sf_ = nullptr; // int ntype_ = 0; // const bool* sep_enable_; diff --git a/source/source_estate/module_pot/potential_new.cpp b/source/source_estate/module_pot/potential_new.cpp index 3b34e0d026..3d958bc87e 100644 --- a/source/source_estate/module_pot/potential_new.cpp +++ b/source/source_estate/module_pot/potential_new.cpp @@ -21,8 +21,9 @@ Potential::Potential(const ModulePW::PW_Basis* rho_basis_in, Structure_Factor* structure_factors_in, surchem* solvent_in, double* etxc_in, - double* vtxc_in) - : ucell_(ucell_in), vloc_(vloc_in), structure_factors_(structure_factors_in), solvent_(solvent_in), etxc_(etxc_in), + double* vtxc_in, + VSep* vsep_cell_in) + : ucell_(ucell_in), vloc_(vloc_in), structure_factors_(structure_factors_in), solvent_(solvent_in), vsep_cell(vsep_cell_in), etxc_(etxc_in), vtxc_(vtxc_in) { this->rho_basis_ = rho_basis_in; @@ -94,11 +95,11 @@ void Potential::allocate() ModuleBase::TITLE("Potential", "allocate"); int nrxx = this->rho_basis_->nrxx; int nrxx_smooth = this->rho_basis_smooth_->nrxx; - if (nrxx == 0) + if (nrxx == 0) { return; } - if (nrxx_smooth == 0) + if (nrxx_smooth == 0) { return; } diff --git a/source/source_estate/module_pot/potential_new.h b/source/source_estate/module_pot/potential_new.h index ec72daaae5..ec2688ed9b 100644 --- a/source/source_estate/module_pot/potential_new.h +++ b/source/source_estate/module_pot/potential_new.h @@ -4,6 +4,7 @@ #include "source_base/complexmatrix.h" #include "source_hamilt/module_surchem/surchem.h" #include "source_pw/module_pwdft/VNL_in_pw.h" +#include "source_pw/module_pwdft/VSep_in_pw.h" #include "source_pw/module_pwdft/structure_factor.h" #include "pot_base.h" @@ -61,7 +62,8 @@ class Potential : public PotBase Structure_Factor* structure_factors_in, surchem* solvent_in, double* etxc_in, - double* vtxc_in); + double* vtxc_in, + VSep* vsep_cell_in = nullptr); ~Potential(); // initialize potential when SCF begin @@ -174,7 +176,7 @@ class Potential : public PotBase { return this->rho_basis_; } - // What about adding a function to get the wfc? + // What about adding a function to get the wfc? // This is useful for the calculation of the exx energy @@ -220,9 +222,10 @@ class Potential : public PotBase const ModuleBase::matrix* vloc_ = nullptr; Structure_Factor* structure_factors_ = nullptr; surchem* solvent_ = nullptr; + VSep* vsep_cell = nullptr; bool use_gpu_ = false; }; } // namespace elecstate -#endif \ No newline at end of file +#endif diff --git a/source/source_estate/module_pot/potential_types.cpp b/source/source_estate/module_pot/potential_types.cpp index db647788d2..f62a34aa75 100644 --- a/source/source_estate/module_pot/potential_types.cpp +++ b/source/source_estate/module_pot/potential_types.cpp @@ -58,7 +58,7 @@ PotBase* Potential::get_pot_type(const std::string& pot_type) } #endif else if (pot_type == "dfthalf") { - return new PotSep(&(this->structure_factors_->strucFac), this->rho_basis_); + return new PotSep(&(this->structure_factors_->strucFac), this->rho_basis_, this->vsep_cell); } else { diff --git a/source/source_estate/test/elecstate_base_test.cpp b/source/source_estate/test/elecstate_base_test.cpp index 4e9aa2307d..da791d4f38 100644 --- a/source/source_estate/test/elecstate_base_test.cpp +++ b/source/source_estate/test/elecstate_base_test.cpp @@ -52,6 +52,10 @@ InfoNonlocal::InfoNonlocal() InfoNonlocal::~InfoNonlocal() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} #include "source_cell/klist.h" ModulePW::PW_Basis::PW_Basis() diff --git a/source/source_estate/test/elecstate_print_test.cpp b/source/source_estate/test/elecstate_print_test.cpp index 0003ef2475..3a016d5ab6 100644 --- a/source/source_estate/test/elecstate_print_test.cpp +++ b/source/source_estate/test/elecstate_print_test.cpp @@ -11,7 +11,7 @@ #include "source_hamilt/module_xc/xc_functional.h" #include "source_io/module_parameter/parameter.h" #include "source_estate/elecstate_print.h" -#undef private +#undef private /*************************************************************** * mock functions ****************************************************************/ @@ -37,6 +37,10 @@ Charge::Charge() Charge::~Charge() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} int XC_Functional::func_type = 0; bool XC_Functional::ked_flag = false; @@ -141,7 +145,7 @@ TEST_F(ElecStatePrintTest, PrintEtot) for (int i = 0; i < vdw_methods.size(); i++) { PARAM.input.vdw_method = vdw_methods[i]; - elecstate::print_etot(ucell.magnet,elecstate, converged, iter, scf_thr, + elecstate::print_etot(ucell.magnet,elecstate, converged, iter, scf_thr, scf_thr_kin, duration, pw_diag_thr, avg_iter, false); } @@ -152,7 +156,7 @@ TEST_F(ElecStatePrintTest, PrintEtot) PARAM.input.ks_solver = ks_solvers[i]; testing::internal::CaptureStdout(); - elecstate::print_etot(ucell.magnet,elecstate,converged, iter, scf_thr, + elecstate::print_etot(ucell.magnet,elecstate,converged, iter, scf_thr, scf_thr_kin, duration, pw_diag_thr, avg_iter, print); output = testing::internal::GetCapturedStdout(); @@ -221,7 +225,7 @@ TEST_F(ElecStatePrintTest, PrintEtotColorS2) PARAM.input.nspin = 2; GlobalV::MY_RANK = 0; - elecstate::print_etot(ucell.magnet,elecstate,converged, iter, scf_thr, + elecstate::print_etot(ucell.magnet,elecstate,converged, iter, scf_thr, scf_thr_kin, duration, pw_diag_thr, avg_iter, print); delete elecstate.charge; @@ -252,7 +256,7 @@ TEST_F(ElecStatePrintTest, PrintEtotColorS4) PARAM.input.noncolin = true; GlobalV::MY_RANK = 0; - elecstate::print_etot(ucell.magnet,elecstate, converged, iter, scf_thr, scf_thr_kin, + elecstate::print_etot(ucell.magnet,elecstate, converged, iter, scf_thr, scf_thr_kin, duration, pw_diag_thr, avg_iter, print); delete elecstate.charge; diff --git a/source/source_estate/test/elecstate_pw_test.cpp b/source/source_estate/test/elecstate_pw_test.cpp index 865b4f0049..4f211ad923 100644 --- a/source/source_estate/test/elecstate_pw_test.cpp +++ b/source/source_estate/test/elecstate_pw_test.cpp @@ -43,6 +43,10 @@ Magnetism::Magnetism() Magnetism::~Magnetism() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} #ifdef __LCAO InfoNonlocal::InfoNonlocal() { @@ -331,4 +335,4 @@ TEST_F(ElecStatePWTest, ParallelKSingle) EXPECT_NO_THROW(elecstate_pw_s->parallelK()); } -#undef protected \ No newline at end of file +#undef protected diff --git a/source/source_estate/test/potential_new_test.cpp b/source/source_estate/test/potential_new_test.cpp index 82a10bf17a..b874be7b39 100644 --- a/source/source_estate/test/potential_new_test.cpp +++ b/source/source_estate/test/potential_new_test.cpp @@ -24,6 +24,10 @@ Magnetism::Magnetism() Magnetism::~Magnetism() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} #ifdef __LCAO InfoNonlocal::InfoNonlocal() { diff --git a/source/source_hamilt/module_surchem/test/setcell.h b/source/source_hamilt/module_surchem/test/setcell.h index 308a557319..6e132f7dd1 100644 --- a/source/source_hamilt/module_surchem/test/setcell.h +++ b/source/source_hamilt/module_surchem/test/setcell.h @@ -27,6 +27,10 @@ Atom_pseudo::Atom_pseudo(){}; Atom_pseudo::~Atom_pseudo(){}; pseudo::pseudo(){}; pseudo::~pseudo(){}; +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} /* Structure_Factor::Structure_Factor(){}; Structure_Factor::~Structure_Factor(){}; @@ -85,4 +89,4 @@ class Setcell }; }; -#endif \ No newline at end of file +#endif diff --git a/source/source_hamilt/module_vdw/test/vdw_test.cpp b/source/source_hamilt/module_vdw/test/vdw_test.cpp index 06bfa00571..5d66cb53a5 100644 --- a/source/source_hamilt/module_vdw/test/vdw_test.cpp +++ b/source/source_hamilt/module_vdw/test/vdw_test.cpp @@ -20,10 +20,10 @@ /** * - Tested functions: * - vdw::make_vdw(): -* Based on the value of INPUT.vdw_method, construct +* Based on the value of INPUT.vdw_method, construct * Vdwd2 or Vdwd3 class, and do the initialization. * - vdw::get_energy()/vdw::get_force()/vdw::get_stress(): -* Calculate the VDW (d2, d3_0 and d3_bj types) enerygy, force, stress. +* Calculate the VDW (d2, d3_0 and d3_bj types) enerygy, force, stress. * - Vdwd2Parameters::initial_parameters() * - Vdwd3Parameters::initial_parameters() */ @@ -60,6 +60,10 @@ Magnetism::~Magnetism() } InfoNonlocal::InfoNonlocal(){} InfoNonlocal::~InfoNonlocal(){} +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} struct atomtype_ { @@ -130,7 +134,7 @@ void construct_ucell(stru_ &stru, UnitCell &ucell) { ucell.itia2iat(it, ia) = iat; ++iat; - } + } } } @@ -211,7 +215,7 @@ TEST_F(vdwd2Test, OneAtomWarning) GlobalV::ofs_warning.open("warning.log"); std::ifstream ifs; std::string output; - + std::unique_ptr vdw_test = vdw::make_vdw(ucell1, input); GlobalV::ofs_warning.close(); @@ -230,7 +234,7 @@ TEST_F(vdwd2Test, D2ReadFile) input.vdw_C6_file = "c6.txt"; input.vdw_R0_file = "r0.txt"; vdw::Vdwd2 vdwd2_test(ucell); - + vdwd2_test.parameter().initial_parameters(input); double Si_C6 = 9.13*1e6 / (ModuleBase::ELECTRONVOLT_SI * ModuleBase::NA) / pow(ModuleBase::BOHR_TO_A, 6)/ ModuleBase::Ry_to_eV; EXPECT_NEAR(vdwd2_test.parameter().C6_["Si"], Si_C6,1e-13); @@ -242,7 +246,7 @@ TEST_F(vdwd2Test, D2ReadFileError) input.vdw_C6_file = "c6_wrong.txt"; input.vdw_R0_file = "r0_wrong.txt"; vdw::Vdwd2 vdwd2_test(ucell); - + testing::internal::CaptureStdout(); EXPECT_EXIT(vdwd2_test.parameter().C6_input(input.vdw_C6_file, input.vdw_C6_unit), ::testing::ExitedWithCode(1), ""); EXPECT_EXIT(vdwd2_test.parameter().R0_input(input.vdw_R0_file, input.vdw_R0_unit), ::testing::ExitedWithCode(1), ""); @@ -284,7 +288,7 @@ TEST_F(vdwd2Test, D2RadiusUnitAngstrom) { input.vdw_cutoff_radius = "56.6918"; input.vdw_radius_unit = "Angstrom"; - + vdw::Vdwd2 vdwd2_test(ucell); vdwd2_test.parameter().initial_parameters(input); EXPECT_EQ(vdwd2_test.parameter().radius_, 56.6918/ModuleBase::BOHR_TO_A); @@ -294,32 +298,32 @@ TEST_F(vdwd2Test, D2CutoffTypePeriod) { input.vdw_cutoff_type = "period"; input.vdw_cutoff_period = {3,3,3}; - + vdw::Vdwd2 vdwd2_test(ucell); vdwd2_test.parameter().initial_parameters(input); EXPECT_EQ(vdwd2_test.parameter().period(), input.vdw_cutoff_period); } TEST_F(vdwd2Test, D2R0ZeroQuit) -{ +{ vdw::Vdwd2 vdwd2_test(ucell); vdwd2_test.parameter().initial_parameters(input); vdwd2_test.parameter().R0_["Si"] = 0.0; - + testing::internal::CaptureStdout(); EXPECT_EXIT(vdwd2_test.get_energy(), ::testing::ExitedWithCode(1), ""); std::string output = testing::internal::GetCapturedStdout(); } TEST_F(vdwd2Test, D2GetEnergy) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); double ene = vdw_solver->get_energy(); EXPECT_NEAR(ene,-0.034526673470525196,1E-10); } TEST_F(vdwd2Test, D2GetForce) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); std::vector> force = vdw_solver->get_force(); EXPECT_NEAR(force[0].x, -0.00078824525563651242,1e-12); @@ -331,7 +335,7 @@ TEST_F(vdwd2Test, D2GetForce) } TEST_F(vdwd2Test, D2GetStress) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); ModuleBase::Matrix3 stress = vdw_solver->get_stress(); EXPECT_NEAR(stress.e11, -0.00020532319044269705,1e-12); @@ -394,7 +398,7 @@ TEST_F(vdwd3Test, D30Default) EXPECT_EQ(vdwd3_test.parameter().version(), "d3_0"); EXPECT_EQ(vdwd3_test.parameter().model(), "radius"); EXPECT_EQ(vdwd3_test.parameter().rthr2(), std::pow(95, 2)); - EXPECT_EQ(vdwd3_test.parameter().cn_thr2(), std::pow(40, 2)); + EXPECT_EQ(vdwd3_test.parameter().cn_thr2(), std::pow(40, 2)); } TEST_F(vdwd3Test, D30UnitA) @@ -407,7 +411,7 @@ TEST_F(vdwd3Test, D30UnitA) vdwd3_test.parameter().initial_parameters(xc, input); EXPECT_EQ(vdwd3_test.parameter().rthr2(), std::pow(95/ModuleBase::BOHR_TO_A, 2)); - EXPECT_EQ(vdwd3_test.parameter().cn_thr2(), std::pow(40/ModuleBase::BOHR_TO_A, 2)); + EXPECT_EQ(vdwd3_test.parameter().cn_thr2(), std::pow(40/ModuleBase::BOHR_TO_A, 2)); } TEST_F(vdwd3Test, D30Period) @@ -421,18 +425,18 @@ TEST_F(vdwd3Test, D30Period) std::vector rep_vdw_ref = {input.vdw_cutoff_period.x, input.vdw_cutoff_period.y, input.vdw_cutoff_period.z}; EXPECT_EQ(vdwd3_test.parameter().period(), input.vdw_cutoff_period); - EXPECT_EQ(vdwd3_test.rep_vdw_, rep_vdw_ref); + EXPECT_EQ(vdwd3_test.rep_vdw_, rep_vdw_ref); } TEST_F(vdwd3Test, D30GetEnergy) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); double ene = vdw_solver->get_energy(); EXPECT_NEAR(ene,-0.20932367230529664,1E-10); } TEST_F(vdwd3Test, D30GetForce) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); std::vector> force = vdw_solver->get_force(); EXPECT_NEAR(force[0].x, -0.032450975169023302,1e-12); @@ -444,7 +448,7 @@ TEST_F(vdwd3Test, D30GetForce) } TEST_F(vdwd3Test, D30GetStress) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); ModuleBase::Matrix3 stress = vdw_solver->get_stress(); EXPECT_NEAR(stress.e11, -0.0011141545452036336,1e-12); @@ -459,15 +463,15 @@ TEST_F(vdwd3Test, D30GetStress) } TEST_F(vdwd3Test, D3bjGetEnergy) -{ - input.vdw_method = "d3_bj"; +{ + input.vdw_method = "d3_bj"; auto vdw_solver = vdw::make_vdw(ucell, input); double ene = vdw_solver->get_energy(); EXPECT_NEAR(ene,-0.047458675421836918,1E-10); } TEST_F(vdwd3Test, D3bjGetForce) -{ +{ input.vdw_method = "d3_bj"; auto vdw_solver = vdw::make_vdw(ucell, input); std::vector> force = vdw_solver->get_force(); @@ -480,7 +484,7 @@ TEST_F(vdwd3Test, D3bjGetForce) } TEST_F(vdwd3Test, D3bjGetStress) -{ +{ input.vdw_method = "d3_bj"; auto vdw_solver = vdw::make_vdw(ucell, input); ModuleBase::Matrix3 stress = vdw_solver->get_stress(); @@ -530,14 +534,14 @@ class vdwd3abcTest: public testing::Test TEST_F(vdwd3abcTest, D30GetEnergy) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); double ene = vdw_solver->get_energy(); EXPECT_NEAR(ene,-0.11487062308916372,1E-10); } TEST_F(vdwd3abcTest, D30GetForce) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); std::vector> force = vdw_solver->get_force(); EXPECT_NEAR(force[0].x, 0.030320738678429094,1e-12); @@ -549,7 +553,7 @@ TEST_F(vdwd3abcTest, D30GetForce) } TEST_F(vdwd3abcTest, D30GetStress) -{ +{ auto vdw_solver = vdw::make_vdw(ucell, input); ModuleBase::Matrix3 stress = vdw_solver->get_stress(); EXPECT_NEAR(stress.e11, -0.00023421562840819491,1e-12); @@ -564,7 +568,7 @@ TEST_F(vdwd3abcTest, D30GetStress) } TEST_F(vdwd3abcTest, D3bjGetEnergy) -{ +{ input.vdw_method = "d3_bj"; auto vdw_solver = vdw::make_vdw(ucell, input); double ene = vdw_solver->get_energy(); @@ -572,7 +576,7 @@ TEST_F(vdwd3abcTest, D3bjGetEnergy) } TEST_F(vdwd3abcTest, D3bjGetForce) -{ +{ input.vdw_method = "d3_bj"; auto vdw_solver = vdw::make_vdw(ucell, input); std::vector> force = vdw_solver->get_force(); @@ -585,7 +589,7 @@ TEST_F(vdwd3abcTest, D3bjGetForce) } TEST_F(vdwd3abcTest, D3bjGetStress) -{ +{ input.vdw_method = "d3_bj"; auto vdw_solver = vdw::make_vdw(ucell, input); ModuleBase::Matrix3 stress = vdw_solver->get_stress(); diff --git a/source/source_hamilt/module_xc/test/xc3_mock.h b/source/source_hamilt/module_xc/test/xc3_mock.h index 8200bb5eaa..131067c3e9 100644 --- a/source/source_hamilt/module_xc/test/xc3_mock.h +++ b/source/source_hamilt/module_xc/test/xc3_mock.h @@ -187,6 +187,11 @@ Charge::~Charge(){}; Magnetism::Magnetism(){}; Magnetism::~Magnetism(){}; +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} + namespace elecstate { void cal_ux(UnitCell& ucell) diff --git a/source/source_io/test/bessel_basis_test.cpp b/source/source_io/test/bessel_basis_test.cpp index b7de398d8f..82728e5af4 100644 --- a/source/source_io/test/bessel_basis_test.cpp +++ b/source/source_io/test/bessel_basis_test.cpp @@ -95,44 +95,44 @@ std::vector CalculateSphericalBessel(int l, double q, const std::vector< /// @return a vector of q, the q is from j_l(qr) std::vector GetSphericalBesselZeros(int order, int number) { std::map, double> zeros; - + zeros[{0, 1}] = 3.14159; zeros[{0, 2}] = 6.28318; zeros[{0, 3}] = 9.42477; zeros[{0, 4}] = 12.5664; zeros[{0, 5}] = 15.708; zeros[{0, 6}] = 18.8495; zeros[{0, 7}] = 21.9911; zeros[{0, 8}] = 25.1327; zeros[{0, 9}] = 28.2743; zeros[{0, 10}] = 31.4159; - + zeros[{1, 1}] = 4.49341; zeros[{1, 2}] = 7.72525; zeros[{1, 3}] = 10.9041; zeros[{1, 4}] = 14.0662; zeros[{1, 5}] = 17.2208; zeros[{1, 6}] = 20.3713; zeros[{1, 7}] = 23.5181; zeros[{1, 8}] = 26.6617; zeros[{1, 9}] = 29.8029; zeros[{1, 10}] = 32.9425; - + zeros[{2, 1}] = 5.76346; zeros[{2, 2}] = 9.09501; zeros[{2, 3}] = 12.3229; zeros[{2, 4}] = 15.5146; zeros[{2, 5}] = 18.6861; zeros[{2, 6}] = 21.8457; zeros[{2, 7}] = 24.9989; zeros[{2, 8}] = 28.1498; zeros[{2, 9}] = 31.2997; zeros[{2, 10}] = 34.4491; - + zeros[{3, 1}] = 7.01559; zeros[{3, 2}] = 10.4013; zeros[{3, 3}] = 13.5821; zeros[{3, 4}] = 16.7496; zeros[{3, 5}] = 19.9023; zeros[{3, 6}] = 23.0446; zeros[{3, 7}] = 26.1799; zeros[{3, 8}] = 29.3105; zeros[{3, 9}] = 32.4377; zeros[{3, 10}] = 35.5629; - + zeros[{4, 1}] = 8.26356; zeros[{4, 2}] = 11.6209; zeros[{4, 3}] = 14.7965; zeros[{4, 4}] = 17.9598; zeros[{4, 5}] = 21.113; zeros[{4, 6}] = 24.2583; zeros[{4, 7}] = 27.3979; zeros[{4, 8}] = 30.5325; zeros[{4, 9}] = 33.6635; zeros[{4, 10}] = 36.7914; - + zeros[{5, 1}] = 9.51045; zeros[{5, 2}] = 12.8377; zeros[{5, 3}] = 16.0106; zeros[{5, 4}] = 19.1714; zeros[{5, 5}] = 22.3224; zeros[{5, 6}] = 25.4666; zeros[{5, 7}] = 28.6055; zeros[{5, 8}] = 31.7408; zeros[{5, 9}] = 34.873; zeros[{5, 10}] = 38.0025; - + std::vector result; for (int i = 1; i <= number; ++i) { result.push_back(zeros[{order, i}]); } return result; } -/// @brief Get mod of q vector of Spherical Bessel functions, all q satisfy when r=`rcut`, j_l(qr)=0. +/// @brief Get mod of q vector of Spherical Bessel functions, all q satisfy when r=`rcut`, j_l(qr)=0. /// @details first solve the equation j_l(x) = 0, therefore get the table (l, k) -> x, where l is the order of SBF and k is the k-th zero of j_l(x). Then let x = q*rcut, therefore q = x/rcut, return it. /// @attention this function itself is a COMPLETE version, while the function it called GetSphericalBesselZeros may be INCOMPLETE, due to limited support of numerical table. /// @param order the angular momentum of Spherical Bessel functions @@ -268,12 +268,12 @@ std::unordered_map ReadinC4(const std::string &FileName, co else{ for (int indexchi = 0; indexchi < TotalNumChi; indexchi++){ - C4File >> word; - C4File >> word; + C4File >> word; + C4File >> word; C4File >> word; /* skip title1, 2 and 3 */ C4File >> word; std::string key = word; key += " "; - C4File >> word; key += word; key += " "; + C4File >> word; key += word; key += " "; C4File >> word; key += word; key += " "; for (int indexNumBesselFunction = 0; indexNumBesselFunction < NumBesselFunction; indexNumBesselFunction++) @@ -359,6 +359,10 @@ Magnetism::Magnetism() Magnetism::~Magnetism() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} #ifdef __LCAO InfoNonlocal::InfoNonlocal() @@ -433,8 +437,8 @@ class TestBesselBasis : public ::testing::Test { int i_Nq = static_cast(sqrt(d_EnergyCutoff)*d_CutoffRadius/M_PI); /* number of SBF is expected to be 1 */ besselBasis.init( - b_TestFaln, d_EnergyCutoff, i_Ntype, i_Lmax, b_Smooth, - d_SmoothSigma, d_CutoffRadius, d_Tolerance, + b_TestFaln, d_EnergyCutoff, i_Ntype, i_Lmax, b_Smooth, + d_SmoothSigma, d_CutoffRadius, d_Tolerance, ucell, d_dk, d_dr ); EXPECT_EQ(besselBasis.get_ecut_number(), i_Nq); @@ -468,8 +472,8 @@ TEST_F(TestBesselBasis, PolynomialInterpolation2Test) { /* therefore the expected dimension of TableOne is 1*1*6 */ besselBasis.init( - b_TestFaln, d_EnergyCutoff, i_Ntype, i_Lmax, b_Smooth, - d_SmoothSigma, d_CutoffRadius, d_Tolerance, + b_TestFaln, d_EnergyCutoff, i_Ntype, i_Lmax, b_Smooth, + d_SmoothSigma, d_CutoffRadius, d_Tolerance, ucell, d_dk, d_dr ); /* gnorm for interpolation */ @@ -483,7 +487,7 @@ TEST_F(TestBesselBasis, PolynomialInterpolation2Test) { double d_x3 = 3.0 - d_x0; std::vector>> vvv_d_TableOne = GenerateTableOne( - b_Smooth, d_SmoothSigma, d_EnergyCutoff, d_CutoffRadius, + b_Smooth, d_SmoothSigma, d_EnergyCutoff, d_CutoffRadius, i_Lmax, d_dr, d_dk ); double d_yExpected = vvv_d_TableOne[0][0][i_position]*d_x1*d_x2*d_x3/6.0+ @@ -517,14 +521,14 @@ TEST_F(TestBesselBasis, PolynomialInterpolationTest) { int i_Nq = static_cast(sqrt(d_EnergyCutoff)*d_CutoffRadius/M_PI); int i_kMesh = static_cast(sqrt(d_EnergyCutoff)/d_dk) + 4 + 1; /* - manipulate Bessel_Basis::init_Faln function + manipulate Bessel_Basis::init_Faln function because for(int it=0; it>> vvv_d_TableOne = GenerateTableOne( - b_Smooth, d_SmoothSigma, d_EnergyCutoff, d_CutoffRadius, + b_Smooth, d_SmoothSigma, d_EnergyCutoff, d_CutoffRadius, i_Lmax, d_dr, d_dk ); std::vector>>> vvvv_d_Faln = GenerateFaln( diff --git a/source/source_io/test/for_testing_klist.h b/source/source_io/test/for_testing_klist.h index 0c764eb00a..e8d518fa49 100644 --- a/source/source_io/test/for_testing_klist.h +++ b/source/source_io/test/for_testing_klist.h @@ -42,6 +42,10 @@ Soc::~Soc() Fcoef::~Fcoef() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} diff --git a/source/source_io/test/outputlog_test.cpp b/source/source_io/test/outputlog_test.cpp index 4932596039..5bcffe27ca 100644 --- a/source/source_io/test/outputlog_test.cpp +++ b/source/source_io/test/outputlog_test.cpp @@ -182,6 +182,10 @@ pseudo::pseudo() pseudo::~pseudo() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} TEST(OutputVacuumLevelTest, OutputVacuumLevel) { diff --git a/source/source_io/test/read_wf2rho_pw_test.cpp b/source/source_io/test/read_wf2rho_pw_test.cpp index 53421fbeef..6c1dd464a9 100644 --- a/source/source_io/test/read_wf2rho_pw_test.cpp +++ b/source/source_io/test/read_wf2rho_pw_test.cpp @@ -46,6 +46,10 @@ Magnetism::Magnetism() Magnetism::~Magnetism() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} int XC_Functional::func_type = 0; bool XC_Functional::ked_flag = false; @@ -285,14 +289,14 @@ TEST_F(ReadWfcRhoTest, ReadWfcRho) ModuleIO::write_wfc_pw( kpar, my_pool, my_rank, nbands, nspin, npol, - GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL, + GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL, PARAM.input.out_wfc_pw, ecutwfc, out_dir, *psi, *kv, wfcpw, running_log); - ModuleIO::read_wf2rho_pw(wfcpw, symm, chg, - out_dir, kpar, my_pool, my_rank, + ModuleIO::read_wf2rho_pw(wfcpw, symm, chg, + out_dir, kpar, my_pool, my_rank, GlobalV::NPROC_IN_POOL, GlobalV::RANK_IN_POOL, - nbands, nspin, npol, + nbands, nspin, npol, nkstot, kv->ik2iktot, kv->isk, running_log); // compare the charge density @@ -301,10 +305,10 @@ TEST_F(ReadWfcRhoTest, ReadWfcRho) EXPECT_NEAR(chg.rho[0][ir], chg_ref.rho[0][ir], 1e-8); } - if (GlobalV::NPROC == 1) + if (GlobalV::NPROC == 1) { EXPECT_NEAR(chg.rho[0][0], 8617.076357957576, 1e-8); - } + } else if (GlobalV::NPROC == 4) { const std::vector ref = {8207.849135313403, 35.34776105132742, 8207.849135313403, 35.34776105132742}; diff --git a/source/source_lcao/module_hcontainer/test/tmp_mocks.cpp b/source/source_lcao/module_hcontainer/test/tmp_mocks.cpp index 748512d4e1..459874d5d5 100644 --- a/source/source_lcao/module_hcontainer/test/tmp_mocks.cpp +++ b/source/source_lcao/module_hcontainer/test/tmp_mocks.cpp @@ -44,6 +44,10 @@ UnitCell::UnitCell() UnitCell::~UnitCell() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} void UnitCell::set_iat2iwt(const int& npol_in) { @@ -58,7 +62,7 @@ void UnitCell::set_iat2iwt(const int& npol_in) this->iat2iwt[iat] = iwt; iwt += atoms[it].nw * this->npol; ++iat; - } + } } return; -} \ No newline at end of file +} diff --git a/source/source_lcao/module_lr/ri_benchmark/test/ri_benchmark_test.cpp b/source/source_lcao/module_lr/ri_benchmark/test/ri_benchmark_test.cpp index 9e66aad4bb..e565796a2e 100644 --- a/source/source_lcao/module_lr/ri_benchmark/test/ri_benchmark_test.cpp +++ b/source/source_lcao/module_lr/ri_benchmark/test/ri_benchmark_test.cpp @@ -15,6 +15,10 @@ Magnetism::Magnetism() {} Magnetism::~Magnetism() {} Atom::Atom() { this->nw = 2; } Atom::~Atom() {} +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} UnitCell::UnitCell() { atoms = new Atom[1]; iat2it = new int[1]; iat2it[0] = 0; diff --git a/source/source_lcao/module_operator_lcao/test/tmp_mocks.cpp b/source/source_lcao/module_operator_lcao/test/tmp_mocks.cpp index 4ea19a1fe8..35c319cc8d 100644 --- a/source/source_lcao/module_operator_lcao/test/tmp_mocks.cpp +++ b/source/source_lcao/module_operator_lcao/test/tmp_mocks.cpp @@ -20,6 +20,10 @@ pseudo::~pseudo() {} // constructor of UnitCell UnitCell::UnitCell() {} UnitCell::~UnitCell() {} +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} void UnitCell::set_iat2iwt(const int& npol_in) { this->iat2iwt.resize(this->nat); diff --git a/source/source_lcao/module_ri/module_exx_symmetry/test/symmetry_rotation_test.cpp b/source/source_lcao/module_ri/module_exx_symmetry/test/symmetry_rotation_test.cpp index 809a5dfeb7..20884b1beb 100644 --- a/source/source_lcao/module_ri/module_exx_symmetry/test/symmetry_rotation_test.cpp +++ b/source/source_lcao/module_ri/module_exx_symmetry/test/symmetry_rotation_test.cpp @@ -33,6 +33,10 @@ InfoNonlocal::InfoNonlocal() {} InfoNonlocal::~InfoNonlocal() {} Magnetism::Magnetism() {} Magnetism::~Magnetism() {} +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} class SymmetryRotationTest : public testing::Test { diff --git a/source/source_pw/module_pwdft/VSep_in_pw.cpp b/source/source_pw/module_pwdft/VSep_in_pw.cpp index 8aa9187c38..3badccbe13 100644 --- a/source/source_pw/module_pwdft/VSep_in_pw.cpp +++ b/source/source_pw/module_pwdft/VSep_in_pw.cpp @@ -13,10 +13,10 @@ #include #include -namespace GlobalC -{ -VSep vsep_cell; -} +// namespace GlobalC +// { +// VSep vsep_cell; +// } namespace { double sphere_cut(double r, double r_out, double r_power) @@ -50,24 +50,24 @@ VSep::VSep() noexcept = default; VSep::~VSep() noexcept = default; -void VSep::init_vsep(const ModulePW::PW_Basis& rho_basis) +void VSep::init_vsep(const ModulePW::PW_Basis& rho_basis, const Sep_Cell& sep_cell) { ModuleBase::TITLE("VSep", "init_vsep"); ModuleBase::timer::tick("VSep", "init_vsep"); - int ntype = GlobalC::sep_cell.get_ntype(); + int ntype = sep_cell.get_ntype(); this->vsep_form.create(ntype, rho_basis.ngg, true); - const double d_fpi_omega = ModuleBase::FOUR_PI / GlobalC::sep_cell.get_omega(); + const double d_fpi_omega = ModuleBase::FOUR_PI / sep_cell.get_omega(); int igl0 = 0; for (int it = 0; it < ntype; ++it) { - if (!GlobalC::sep_cell.get_sep_enable()[it]) + if (!sep_cell.get_sep_enable()[it]) { continue; } - const SepPot* sep_pot = &GlobalC::sep_cell.get_seps()[it]; + const SepPot* sep_pot = &sep_cell.get_seps()[it]; // Simpson integral requires that the grid points be odd, if it is even, subtract one. int mesh = sep_pot->mesh; if ((mesh & 1) == 0) @@ -108,7 +108,7 @@ void VSep::init_vsep(const ModulePW::PW_Basis& rho_basis) for (int ig = igl0; ig < rho_basis.ngg; ++ig) { - double gx2 = rho_basis.gg_uniq[ig] * GlobalC::sep_cell.get_tpiba2(); + double gx2 = rho_basis.gg_uniq[ig] * sep_cell.get_tpiba2(); double gx = std::sqrt(gx2); for (int ir = 0; ir < sep_pot->mesh; ++ir) { @@ -122,7 +122,9 @@ void VSep::init_vsep(const ModulePW::PW_Basis& rho_basis) ModuleBase::timer::tick("VSep", "init_vsep"); } -void VSep::generate_vsep_r(const ModulePW::PW_Basis& rho_basis, const ModuleBase::ComplexMatrix& sf_in) +void VSep::generate_vsep_r(const ModulePW::PW_Basis& rho_basis, + const ModuleBase::ComplexMatrix& sf_in, + const Sep_Cell& sep_cell) { ModuleBase::TITLE("VSep", "generate_vsep_r"); ModuleBase::timer::tick("VSep", "generate_vsep_r"); @@ -133,9 +135,9 @@ void VSep::generate_vsep_r(const ModulePW::PW_Basis& rho_basis, const ModuleBase std::unique_ptr[]> vg(new std::complex[rho_basis.npw]); ModuleBase::GlobalFunc::ZEROS(vg.get(), rho_basis.npw); - for (int it = 0; it < GlobalC::sep_cell.get_ntype(); it++) + for (int it = 0; it < sep_cell.get_ntype(); it++) { - if (!GlobalC::sep_cell.get_sep_enable()[it]) + if (!sep_cell.get_sep_enable()[it]) { continue; } diff --git a/source/source_pw/module_pwdft/VSep_in_pw.h b/source/source_pw/module_pwdft/VSep_in_pw.h index 5b5be91367..e2344d43af 100644 --- a/source/source_pw/module_pwdft/VSep_in_pw.h +++ b/source/source_pw/module_pwdft/VSep_in_pw.h @@ -3,6 +3,7 @@ #include "source_base/matrix.h" #include "source_basis/module_pw/pw_basis.h" +#include "source_cell/sep_cell.h" #include @@ -12,8 +13,8 @@ class VSep VSep() noexcept; ~VSep() noexcept; - void init_vsep(const ModulePW::PW_Basis& rho_basis); - void generate_vsep_r(const ModulePW::PW_Basis& rho_basis, const ModuleBase::ComplexMatrix& sf_in); + void init_vsep(const ModulePW::PW_Basis& rho_basis, const Sep_Cell& sep_cell); + void generate_vsep_r(const ModulePW::PW_Basis& rho_basis, const ModuleBase::ComplexMatrix& sf_in, const Sep_Cell& sep_cell); ModuleBase::matrix vsep_form; std::vector vsep_r; @@ -21,10 +22,10 @@ class VSep private: int nrxx = 0; }; - -namespace GlobalC -{ -extern VSep vsep_cell; -} +// +// namespace GlobalC +// { +// extern VSep vsep_cell; +// } #endif /* ifndef VSEP_IN_PW */ diff --git a/source/source_relax/test/for_test.h b/source/source_relax/test/for_test.h index 004d7da052..5696476d49 100644 --- a/source/source_relax/test/for_test.h +++ b/source/source_relax/test/for_test.h @@ -99,8 +99,12 @@ pseudo::pseudo() pseudo::~pseudo() { } +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} int ModuleSymmetry::Symmetry::symm_flag = 0; void ModuleSymmetry::Symmetry::symmetrize_mat3(ModuleBase::matrix& sigma, const Lattice& lat)const {}; void ModuleSymmetry::Symmetry::symmetrize_vec3_nat(double* v)const {}; -#endif \ No newline at end of file +#endif diff --git a/source/source_relax/test/relax_test.h b/source/source_relax/test/relax_test.h index 0cc4564194..d3c6ce5d2f 100644 --- a/source/source_relax/test/relax_test.h +++ b/source/source_relax/test/relax_test.h @@ -17,9 +17,13 @@ Atom_pseudo::Atom_pseudo(){}; Atom_pseudo::~Atom_pseudo(){}; pseudo::pseudo(){}; pseudo::~pseudo(){}; +SepPot::SepPot(){} +SepPot::~SepPot(){} +Sep_Cell::Sep_Cell() noexcept {} +Sep_Cell::~Sep_Cell() noexcept {} int ModuleSymmetry::Symmetry::symm_flag = 0; void ModuleSymmetry::Symmetry::symmetrize_mat3(ModuleBase::matrix& sigma, const Lattice& lat)const {}; void ModuleSymmetry::Symmetry::symmetrize_vec3_nat(double* v)const {}; Structure_Factor::Structure_Factor() {}; Structure_Factor::~Structure_Factor(){}; -void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Parallel_Grid&, const ModulePW::PW_Basis* rho_basis){}; \ No newline at end of file +void Structure_Factor::setup_structure_factor(const UnitCell* Ucell, const Parallel_Grid&, const ModulePW::PW_Basis* rho_basis){};