From a50d09caaa0271e85f3cec065fe26f0f9155f50a Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 4 Mar 2025 17:43:31 +0800 Subject: [PATCH 1/7] remove wrong timer --- source/module_elecstate/module_charge/charge_mixing.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp index 78fe8ea70c..0fcc13fc5e 100644 --- a/source/module_elecstate/module_charge/charge_mixing.cpp +++ b/source/module_elecstate/module_charge/charge_mixing.cpp @@ -193,7 +193,6 @@ void Charge_Mixing::mix_reset() bool Charge_Mixing::if_scf_oscillate(const int iteration, const double drho, const int iternum_used, const double threshold) { ModuleBase::TITLE("Charge_Mixing", "if_scf_oscillate"); - ModuleBase::timer::tick("Charge_Mixing", "if_scf_oscillate"); if(this->_drho_history.size() == 0) { @@ -241,7 +240,4 @@ bool Charge_Mixing::if_scf_oscillate(const int iteration, const double drho, con } return false; - - ModuleBase::timer::tick("Charge_Mixing", "if_scf_oscillate"); - } \ No newline at end of file From 415c33b07542bfe74c60b47342538b7d7386f618 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 4 Mar 2025 18:13:06 +0800 Subject: [PATCH 2/7] omp for cal_force_stress --- .../operator_lcao/nonlocal_force_stress.hpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp index b6cbe96786..8299b75ee4 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp @@ -27,7 +27,10 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, } // 1. calculate for each pair of atoms // loop over all on-site atoms - int atom_index = 0; + #pragma omp parallel + { + std::vector stress_local(6, 0); + #pragma omp for schedule(dynamic) for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) { // skip the atoms without plus-U @@ -157,12 +160,20 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, tmp, dis1, dis2, - stress_tmp.data()); + stress_local.data()); } } } } } + #pragma omp critical + { + for(int i = 0; i < 6; i++) + { + stress_tmp[i] += stress_local[i]; + } + } + } if (cal_force) { @@ -258,6 +269,7 @@ void NonlocalNew, std::complex>>::cal_ + dm_pointer[step_trace[1]] * nlm_tmp[i+3] + dm_pointer[step_trace[2]] * nlm_tmp[i+6] + dm_pointer[step_trace[3]] * nlm_tmp[i+9]).real(); + #pragma omp atomic force1[i] += tmp; force2[i] -= tmp; } @@ -379,7 +391,10 @@ void NonlocalNew>::cal_force_IJR(const int& iat1, // calculate the force, transfer nlm_tmp to pauli matrix for(int i = 0; i < 3; i++) { + #pragma omp atomic force1[i] += dm_pointer[0] * nlm_tmp[i]; + // different threads have different addresses for force2, + // so there is no need to add atomic here. force2[i] -= dm_pointer[0] * nlm_tmp[i]; } dm_pointer++; From 59a8303b7935871fd94a0a4a9d66d56aff31e781 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 4 Mar 2025 19:25:08 +0800 Subject: [PATCH 3/7] openmp for cal_force_stress in dftu --- .../operator_lcao/dftu_force_stress.hpp | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp index e83c811a03..735a69b88e 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp @@ -39,7 +39,10 @@ void DFTU>::cal_force_stress(const bool cal_force, } // 1. calculate for each pair of atoms // loop over all on-site atoms - int atom_index = 0; + #pragma omp parallel + { + std::vector stress_local(6, 0); + #pragma omp for schedule(dynamic) for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) { // skip the atoms without plus-U @@ -51,7 +54,7 @@ void DFTU>::cal_force_stress(const bool cal_force, continue; } const int tlp1 = 2 * target_L + 1; - AdjacentAtomInfo& adjs = this->adjs_all[atom_index++]; + AdjacentAtomInfo& adjs = this->adjs_all[iat0]; std::vector>> nlm_tot; nlm_tot.resize(adjs.adj_num + 1); @@ -205,13 +208,20 @@ void DFTU>::cal_force_stress(const bool cal_force, this->nspin, dis1, dis2, - stress_tmp.data()); + stress_local.data()); } } } } } - + #pragma omp critical + { + for(int i = 0; i < 6; i++) + { + stress_tmp[i] += stress_local[i]; + } + } + } if (cal_force) { #ifdef __MPI @@ -304,9 +314,14 @@ void DFTU>::cal_force_IJR(const int& iat1, = vu_in[m1 * m_size + m2 + is * m_size2] * nlm1[m1 + m_size * 3] * nlm2[m2] * dm_pointer[step_trace[step_is]]; // force1 = - VU * * // force2 = - VU * * } + #pragma omp atomic force1[0] += tmp[0]; + #pragma omp atomic force1[1] += tmp[1]; + #pragma omp atomic force1[2] += tmp[2]; + // different threads have different addresses for force2, + // so there is no need to add atomic here. force2[0] -= tmp[0]; force2[1] -= tmp[1]; force2[2] -= tmp[2]; From 7316e097e2b41a6310f739ba57c49e9d343555d1 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 4 Mar 2025 19:30:44 +0800 Subject: [PATCH 4/7] openmp for cal_force_stress in dspin --- .../operator_lcao/dspin_force_stress.hpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp index da080ae60f..7ad8913e6f 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp @@ -32,7 +32,10 @@ void DeltaSpin>::cal_force_stress(const bool cal_force, } // 1. calculate for each pair of atoms // loop over all on-site atoms - int atom_index = 0; + #pragma omp parallel + { + std::vector stress_local(6, 0); + #pragma omp for schedule(dynamic) for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) { if(!this->constraint_atom_list[iat0]) @@ -183,12 +186,20 @@ void DeltaSpin>::cal_force_stress(const bool cal_force, this->nspin, dis1, dis2, - stress_tmp.data()); + stress_local.data()); } } } } } + #pragma omp critical + { + for(int i = 0; i < 6; i++) + { + stress_tmp[i] += stress_local[i]; + } + } + } if (cal_force) { @@ -281,9 +292,14 @@ void DeltaSpin>::cal_force_IJR(const int& iat1, tmp[2] = lambda_tmp * nlm1[index + length * 3] * nlm2[index] * dm_pointer[step_trace[is]]; // force1 = - VU * * // force2 = - VU * * } + #pragma omp atomic force1[0] += tmp[0]; + #pragma omp atomic force1[1] += tmp[1]; + #pragma omp atomic force1[2] += tmp[2]; + // different threads have different addresses for force2, + // so there is no need to add atomic here. force2[0] -= tmp[0]; force2[1] -= tmp[1]; force2[2] -= tmp[2]; From db76d775284c4f089d26cf7f199300fdebec075c Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 4 Mar 2025 19:32:18 +0800 Subject: [PATCH 5/7] little change --- .../hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp | 7 +++++-- .../hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp | 7 +++++-- .../hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp | 7 +++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp index 735a69b88e..dcdc1decf5 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp @@ -216,9 +216,12 @@ void DFTU>::cal_force_stress(const bool cal_force, } #pragma omp critical { - for(int i = 0; i < 6; i++) + if(cal_stress) { - stress_tmp[i] += stress_local[i]; + for(int i = 0; i < 6; i++) + { + stress_tmp[i] += stress_local[i]; + } } } } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp index 7ad8913e6f..581120926a 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp @@ -194,9 +194,12 @@ void DeltaSpin>::cal_force_stress(const bool cal_force, } #pragma omp critical { - for(int i = 0; i < 6; i++) + if(cal_stress) { - stress_tmp[i] += stress_local[i]; + for(int i = 0; i < 6; i++) + { + stress_tmp[i] += stress_local[i]; + } } } } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp index 8299b75ee4..0774b14a11 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp @@ -168,9 +168,12 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, } #pragma omp critical { - for(int i = 0; i < 6; i++) + if(cal_stress) { - stress_tmp[i] += stress_local[i]; + for(int i = 0; i < 6; i++) + { + stress_tmp[i] += stress_local[i]; + } } } } From 1ed0342af5fc25d0e4ee47877d07d2072e4b99ae Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Wed, 5 Mar 2025 00:32:16 +0800 Subject: [PATCH 6/7] fix bug --- .../operator_lcao/dftu_force_stress.hpp | 5 +- .../operator_lcao/dspin_force_stress.hpp | 5 +- .../operator_lcao/nonlocal_force_stress.hpp | 51 +++++++++++-------- .../operator_lcao/nonlocal_new.h | 4 +- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp index dcdc1decf5..229c716de0 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp @@ -323,10 +323,11 @@ void DFTU>::cal_force_IJR(const int& iat1, force1[1] += tmp[1]; #pragma omp atomic force1[2] += tmp[2]; - // different threads have different addresses for force2, - // so there is no need to add atomic here. + #pragma omp atomic force2[0] -= tmp[0]; + #pragma omp atomic force2[1] -= tmp[1]; + #pragma omp atomic force2[2] -= tmp[2]; } } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp index 581120926a..f50bf282ce 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp @@ -301,10 +301,11 @@ void DeltaSpin>::cal_force_IJR(const int& iat1, force1[1] += tmp[1]; #pragma omp atomic force1[2] += tmp[2]; - // different threads have different addresses for force2, - // so there is no need to add atomic here. + #pragma omp atomic force2[0] -= tmp[0]; + #pragma omp atomic force2[1] -= tmp[1]; + #pragma omp atomic force2[2] -= tmp[2]; } } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp index 0774b14a11..c9ada282fd 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp @@ -36,11 +36,12 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, // skip the atoms without plus-U auto tau0 = ucell->get_tau(iat0); int I0 = 0; - ucell->iat2iait(iat0, &I0, &this->current_type); + int T0 = 0; + ucell->iat2iait(iat0, &I0, &T0); // first step: find the adjacent atoms and filter the real adjacent atoms AdjacentAtomInfo adjs; - this->gridD->Find_atom(*ucell, tau0, this->current_type, I0, &adjs); + this->gridD->Find_atom(*ucell, tau0, T0, I0, &adjs); std::vector is_adj(adjs.adj_num + 1, false); for (int ad = 0; ad < adjs.adj_num + 1; ++ad) @@ -54,7 +55,7 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, // When equal, the theoretical value of matrix element is zero, // but the calculated value is not zero due to the numerical error, which would lead to result changes. if (this->ucell->cal_dtau(iat0, iat1, R_index1).norm() * this->ucell->lat0 - < orb_cutoff_[T1] + this->ucell->infoNL.Beta[this->current_type].get_rcut_max()) + < orb_cutoff_[T1] + this->ucell->infoNL.Beta[T0].get_rcut_max()) { is_adj[ad] = true; } @@ -92,7 +93,7 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2; ModuleBase::Vector3 dtau = tau0 - tau1; - intor_->snap(T1, L1, N1, M1, this->current_type, dtau * this->ucell->lat0, true /*cal_deri*/, nlm); + intor_->snap(T1, L1, N1, M1, T0, dtau * this->ucell->lat0, true /*cal_deri*/, nlm); // select the elements of nlm with target_L const int length = nlm[0].size(); std::vector nlm_target(length * 4); @@ -142,6 +143,7 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, if (cal_force) { this->cal_force_IJR(iat1, iat2, + T0, paraV, nlm_iat0[ad1], nlm_iat0[ad2], @@ -154,6 +156,7 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, if (cal_stress) { this->cal_stress_IJR(iat1, iat2, + T0, paraV, nlm_iat0[ad1], nlm_iat0[ad2], @@ -216,6 +219,7 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, template <> void NonlocalNew, std::complex>>::cal_force_IJR(const int& iat1, const int& iat2, + const int& T0, const Parallel_Orbitals* paraV, const std::unordered_map>& nlm1_all, const std::unordered_map>& nlm2_all, @@ -255,11 +259,11 @@ void NonlocalNew, std::complex>>::cal_ std::vector> nlm_tmp(12, ModuleBase::ZERO); for (int is = 0; is < 4; ++is) { - for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[is]; no++) + for (int no = 0; no < this->ucell->atoms[T0].ncpp.non_zero_count_soc[is]; no++) { - const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[is][no]; - const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[is][no]; - this->ucell->atoms[this->current_type].ncpp.get_d(is, p1, p2, tmp_d); + const int p1 = this->ucell->atoms[T0].ncpp.index1_soc[is][no]; + const int p2 = this->ucell->atoms[T0].ncpp.index2_soc[is][no]; + this->ucell->atoms[T0].ncpp.get_d(is, p1, p2, tmp_d); nlm_tmp[is*3] += nlm1[p1 + length] * nlm2[p2] * (*tmp_d); nlm_tmp[is*3+1] += nlm1[p1 + length * 2] * nlm2[p2] * (*tmp_d); nlm_tmp[is*3+2] += nlm1[p1 + length * 3] * nlm2[p2] * (*tmp_d); @@ -274,6 +278,7 @@ void NonlocalNew, std::complex>>::cal_ + dm_pointer[step_trace[3]] * nlm_tmp[i+9]).real(); #pragma omp atomic force1[i] += tmp; + #pragma omp atomic force2[i] -= tmp; } dm_pointer += npol; @@ -285,6 +290,7 @@ void NonlocalNew, std::complex>>::cal_ template <> void NonlocalNew, std::complex>>::cal_stress_IJR(const int& iat1, const int& iat2, + const int& T0, const Parallel_Orbitals* paraV, const std::unordered_map>& nlm1_all, const std::unordered_map>& nlm2_all, @@ -326,11 +332,11 @@ void NonlocalNew, std::complex>>::cal_ std::vector> nlm_tmp(npol2 * 6, ModuleBase::ZERO); for (int is = 0; is < 4; ++is) { - for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[is]; no++) + for (int no = 0; no < this->ucell->atoms[T0].ncpp.non_zero_count_soc[is]; no++) { - const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[is][no]; - const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[is][no]; - this->ucell->atoms[this->current_type].ncpp.get_d(is, p1, p2, tmp_d); + const int p1 = this->ucell->atoms[T0].ncpp.index1_soc[is][no]; + const int p2 = this->ucell->atoms[T0].ncpp.index2_soc[is][no]; + this->ucell->atoms[T0].ncpp.get_d(is, p1, p2, tmp_d); nlm_tmp[is*6] += (nlm1[p1 + length] * dis1.x * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.x) * (*tmp_d); nlm_tmp[is*6+1] += (nlm1[p1 + length] * dis1.y * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.y) * (*tmp_d); nlm_tmp[is*6+2] += (nlm1[p1 + length] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.z) * (*tmp_d); @@ -356,6 +362,7 @@ void NonlocalNew, std::complex>>::cal_ template void NonlocalNew>::cal_force_IJR(const int& iat1, const int& iat2, + const int& T0, const Parallel_Orbitals* paraV, const std::unordered_map>& nlm1_all, const std::unordered_map>& nlm2_all, @@ -382,11 +389,11 @@ void NonlocalNew>::cal_force_IJR(const int& iat1, assert(nlm1.size() == nlm2.size()); #endif std::vector nlm_tmp(3, 0.0); - for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[0]; no++) + for (int no = 0; no < this->ucell->atoms[T0].ncpp.non_zero_count_soc[0]; no++) { - const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[0][no]; - const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[0][no]; - this->ucell->atoms[this->current_type].ncpp.get_d(0, p1, p2, tmp_d); + const int p1 = this->ucell->atoms[T0].ncpp.index1_soc[0][no]; + const int p2 = this->ucell->atoms[T0].ncpp.index2_soc[0][no]; + this->ucell->atoms[T0].ncpp.get_d(0, p1, p2, tmp_d); nlm_tmp[0] += nlm1[p1 + length] * nlm2[p2] * (*tmp_d); nlm_tmp[1] += nlm1[p1 + length * 2] * nlm2[p2] * (*tmp_d); nlm_tmp[2] += nlm1[p1 + length * 3] * nlm2[p2] * (*tmp_d); @@ -396,8 +403,7 @@ void NonlocalNew>::cal_force_IJR(const int& iat1, { #pragma omp atomic force1[i] += dm_pointer[0] * nlm_tmp[i]; - // different threads have different addresses for force2, - // so there is no need to add atomic here. + #pragma omp atomic force2[i] -= dm_pointer[0] * nlm_tmp[i]; } dm_pointer++; @@ -408,6 +414,7 @@ void NonlocalNew>::cal_force_IJR(const int& iat1, template void NonlocalNew>::cal_stress_IJR(const int& iat1, const int& iat2, + const int& T0, const Parallel_Orbitals* paraV, const std::unordered_map>& nlm1_all, const std::unordered_map>& nlm2_all, @@ -435,11 +442,11 @@ void NonlocalNew>::cal_stress_IJR(const int& iat1, assert(nlm1.size() == nlm2.size()); #endif std::vector nlm_tmp(6, 0.0); - for (int no = 0; no < this->ucell->atoms[this->current_type].ncpp.non_zero_count_soc[0]; no++) + for (int no = 0; no < this->ucell->atoms[T0].ncpp.non_zero_count_soc[0]; no++) { - const int p1 = this->ucell->atoms[this->current_type].ncpp.index1_soc[0][no]; - const int p2 = this->ucell->atoms[this->current_type].ncpp.index2_soc[0][no]; - this->ucell->atoms[this->current_type].ncpp.get_d(0, p1, p2, tmp_d); + const int p1 = this->ucell->atoms[T0].ncpp.index1_soc[0][no]; + const int p2 = this->ucell->atoms[T0].ncpp.index2_soc[0][no]; + this->ucell->atoms[T0].ncpp.get_d(0, p1, p2, tmp_d); nlm_tmp[0] += (nlm1[p1 + length] * dis1.x * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.x) * (*tmp_d); nlm_tmp[1] += (nlm1[p1 + length] * dis1.y * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.y) * (*tmp_d); nlm_tmp[2] += (nlm1[p1 + length] * dis1.z * nlm2[p2] + nlm1[p1] * nlm2[p2 + length] * dis2.z) * (*tmp_d); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h index 414cd9b041..3f41a1f308 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h @@ -102,12 +102,13 @@ class NonlocalNew> : public OperatorLCAO TR* data_pointer); const Grid_Driver* gridD = nullptr; - int current_type = 0; + /** * @brief calculate the atomic Force of atom pair */ void cal_force_IJR(const int& iat1, const int& iat2, + const int& T0, const Parallel_Orbitals* paraV, const std::unordered_map>& nlm1_all, const std::unordered_map>& nlm2_all, @@ -119,6 +120,7 @@ class NonlocalNew> : public OperatorLCAO */ void cal_stress_IJR(const int& iat1, const int& iat2, + const int& T0, const Parallel_Orbitals* paraV, const std::unordered_map>& nlm1_all, const std::unordered_map>& nlm2_all, From 873a0c222842944e1d398326df47e95ceb87e010 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Wed, 5 Mar 2025 11:34:00 +0800 Subject: [PATCH 7/7] fix a bug --- .../operator_lcao/dftu_force_stress.hpp | 15 +++++++-------- .../operator_lcao/dspin_force_stress.hpp | 15 +++++++-------- .../operator_lcao/nonlocal_force_stress.hpp | 14 ++++++++------ 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp index 229c716de0..de17d654e7 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp @@ -42,6 +42,7 @@ void DFTU>::cal_force_stress(const bool cal_force, #pragma omp parallel { std::vector stress_local(6, 0); + ModuleBase::matrix force_local(force.nr, force.nc); #pragma omp for schedule(dynamic) for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) { @@ -159,8 +160,8 @@ void DFTU>::cal_force_stress(const bool cal_force, const int T1 = adjs.ntype[ad1]; const int I1 = adjs.natom[ad1]; const int iat1 = ucell->itia2iat(T1, I1); - double* force_tmp1 = (cal_force) ? &force(iat1, 0) : nullptr; - double* force_tmp2 = (cal_force) ? &force(iat0, 0) : nullptr; + double* force_tmp1 = (cal_force) ? &force_local(iat1, 0) : nullptr; + double* force_tmp2 = (cal_force) ? &force_local(iat0, 0) : nullptr; ModuleBase::Vector3& R_index1 = adjs.box[ad1]; ModuleBase::Vector3 dis1 = adjs.adjacent_tau[ad1] - tau0; for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2) @@ -216,6 +217,10 @@ void DFTU>::cal_force_stress(const bool cal_force, } #pragma omp critical { + if(cal_force) + { + force += force_local; + } if(cal_stress) { for(int i = 0; i < 6; i++) @@ -317,17 +322,11 @@ void DFTU>::cal_force_IJR(const int& iat1, = vu_in[m1 * m_size + m2 + is * m_size2] * nlm1[m1 + m_size * 3] * nlm2[m2] * dm_pointer[step_trace[step_is]]; // force1 = - VU * * // force2 = - VU * * } - #pragma omp atomic force1[0] += tmp[0]; - #pragma omp atomic force1[1] += tmp[1]; - #pragma omp atomic force1[2] += tmp[2]; - #pragma omp atomic force2[0] -= tmp[0]; - #pragma omp atomic force2[1] -= tmp[1]; - #pragma omp atomic force2[2] -= tmp[2]; } } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp index f50bf282ce..0646a0b617 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dspin_force_stress.hpp @@ -35,6 +35,7 @@ void DeltaSpin>::cal_force_stress(const bool cal_force, #pragma omp parallel { std::vector stress_local(6, 0); + ModuleBase::matrix force_local(force.nr, force.nc); #pragma omp for schedule(dynamic) for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) { @@ -136,8 +137,8 @@ void DeltaSpin>::cal_force_stress(const bool cal_force, const int T1 = adjs.ntype[ad1]; const int I1 = adjs.natom[ad1]; const int iat1 = ucell->itia2iat(T1, I1); - double* force_tmp1 = (cal_force) ? &force(iat1, 0) : nullptr; - double* force_tmp2 = (cal_force) ? &force(iat0, 0) : nullptr; + double* force_tmp1 = (cal_force) ? &force_local(iat1, 0) : nullptr; + double* force_tmp2 = (cal_force) ? &force_local(iat0, 0) : nullptr; ModuleBase::Vector3& R_index1 = adjs.box[ad1]; ModuleBase::Vector3 dis1 = adjs.adjacent_tau[ad1] - tau0; for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2) @@ -194,6 +195,10 @@ void DeltaSpin>::cal_force_stress(const bool cal_force, } #pragma omp critical { + if(cal_force) + { + force += force_local; + } if(cal_stress) { for(int i = 0; i < 6; i++) @@ -295,17 +300,11 @@ void DeltaSpin>::cal_force_IJR(const int& iat1, tmp[2] = lambda_tmp * nlm1[index + length * 3] * nlm2[index] * dm_pointer[step_trace[is]]; // force1 = - VU * * // force2 = - VU * * } - #pragma omp atomic force1[0] += tmp[0]; - #pragma omp atomic force1[1] += tmp[1]; - #pragma omp atomic force1[2] += tmp[2]; - #pragma omp atomic force2[0] -= tmp[0]; - #pragma omp atomic force2[1] -= tmp[1]; - #pragma omp atomic force2[2] -= tmp[2]; } } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp index c9ada282fd..d6999b7ae9 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_force_stress.hpp @@ -30,6 +30,7 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, #pragma omp parallel { std::vector stress_local(6, 0); + ModuleBase::matrix force_local(force.nr, force.nc); #pragma omp for schedule(dynamic) for (int iat0 = 0; iat0 < this->ucell->nat; iat0++) { @@ -115,8 +116,8 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, const int T1 = adjs.ntype[ad1]; const int I1 = adjs.natom[ad1]; const int iat1 = ucell->itia2iat(T1, I1); - double* force_tmp1 = (cal_force) ? &force(iat1, 0) : nullptr; - double* force_tmp2 = (cal_force) ? &force(iat0, 0) : nullptr; + double* force_tmp1 = (cal_force) ? &force_local(iat1, 0) : nullptr; + double* force_tmp2 = (cal_force) ? &force_local(iat0, 0) : nullptr; ModuleBase::Vector3& R_index1 = adjs.box[ad1]; ModuleBase::Vector3 dis1 = adjs.adjacent_tau[ad1] - tau0; for (int ad2 = 0; ad2 < adjs.adj_num + 1; ++ad2) @@ -171,6 +172,10 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, } #pragma omp critical { + if(cal_force) + { + force += force_local; + } if(cal_stress) { for(int i = 0; i < 6; i++) @@ -178,6 +183,7 @@ void NonlocalNew>::cal_force_stress(const bool cal_force, stress_tmp[i] += stress_local[i]; } } + } } @@ -276,9 +282,7 @@ void NonlocalNew, std::complex>>::cal_ + dm_pointer[step_trace[1]] * nlm_tmp[i+3] + dm_pointer[step_trace[2]] * nlm_tmp[i+6] + dm_pointer[step_trace[3]] * nlm_tmp[i+9]).real(); - #pragma omp atomic force1[i] += tmp; - #pragma omp atomic force2[i] -= tmp; } dm_pointer += npol; @@ -401,9 +405,7 @@ void NonlocalNew>::cal_force_IJR(const int& iat1, // calculate the force, transfer nlm_tmp to pauli matrix for(int i = 0; i < 3; i++) { - #pragma omp atomic force1[i] += dm_pointer[0] * nlm_tmp[i]; - #pragma omp atomic force2[i] -= dm_pointer[0] * nlm_tmp[i]; } dm_pointer++;