From 4acb89506b83158008b4582f79e7a4824ea80f71 Mon Sep 17 00:00:00 2001 From: linpz Date: Mon, 5 Jan 2026 15:51:58 +0800 Subject: [PATCH 1/2] Refactor class DensityMatrix. Make the code easier to read --- .../module_dm/density_matrix.cpp | 415 ++++++++---------- .../module_lr/dm_trans/dmr_complex.cpp | 33 +- 2 files changed, 197 insertions(+), 251 deletions(-) diff --git a/source/source_estate/module_dm/density_matrix.cpp b/source/source_estate/module_dm/density_matrix.cpp index 7282226a84..61b162cfc8 100644 --- a/source/source_estate/module_dm/density_matrix.cpp +++ b/source/source_estate/module_dm/density_matrix.cpp @@ -51,134 +51,107 @@ DensityMatrix::DensityMatrix(const Parallel_Orbitals* paraV_in, const in ModuleBase::Memory::record("DensityMatrix::DMK", this->_DMK.size() * this->_DMK[0].size() * sizeof(TK)); } + // calculate DMR from DMK using blas for multi-k calculation template <> void DensityMatrix, double>::cal_DMR(const int ik_in) { ModuleBase::TITLE("DensityMatrix", "cal_DMR"); + using TR = double; // To check whether DMR has been initialized -#ifdef __DEBUG assert(!this->_DMR.empty() && "DMR has not been initialized!"); -#endif ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); - int ld_hk = this->_paraV->nrow; + const int ld_hk = this->_paraV->nrow; for (int is = 1; is <= this->_nspin; ++is) { - int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* target_DMR = this->_DMR[is - 1]; + const int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 + hamilt::HContainer*const target_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step target_DMR->set_zero(); -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic) -#endif + #ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) + #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); - int iat1 = target_ap.get_atom_i(); - int iat2 = target_ap.get_atom_j(); + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); + const int iat1 = target_ap.get_atom_i(); + const int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process - int row_ap = this->_paraV->atom_begin_row[iat1]; - int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_ap = this->_paraV->atom_begin_row[iat1]; + const int col_ap = this->_paraV->atom_begin_col[iat2]; const int row_size = this->_paraV->get_row_size(iat1); const int col_size = this->_paraV->get_col_size(iat2); const int mat_size = row_size * col_size; - const int r_size = target_ap.get_R_size(); - if (row_ap == -1 || col_ap == -1) - { - throw std::string("Atom-pair not belong this process"); - } - std::vector> tmp_DMR; - if (PARAM.inp.nspin == 4) - { - tmp_DMR.resize(mat_size * r_size, 0); - } + const int R_size = target_ap.get_R_size(); + assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); + std::vector> tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); // calculate kphase and target_mat_ptr - std::vector> kphase_vec(r_size * this->_nk); - std::vector target_DMR_mat_vec(r_size); - for(int ir = 0; ir < r_size; ++ir) + std::vector>> kphase_vec(this->_nk, {R_size}); + std::vector target_DMR_mat_vec(R_size); + for(int iR = 0; iR < R_size; ++iR) { - const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); - hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); -#ifdef __DEBUG + const ModuleBase::Vector3 R_index = target_ap.get_R_index(iR); + hamilt::BaseMatrix*const target_mat = target_ap.find_matrix(R_index); + #ifdef __DEBUG if (target_mat == nullptr) { std::cout << "target_mat is nullptr" << std::endl; continue; } -#endif - target_DMR_mat_vec[ir] = target_mat->get_pointer(); + #endif + target_DMR_mat_vec[iR] = target_mat->get_pointer(); for(int ik = 0; ik < this->_nk; ++ik) { - if(ik_in >= 0 && ik_in != ik) - { - continue; - } + if(ik_in >= 0 && ik_in != ik) { continue; } // cal k_phase // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); + const ModuleBase::Vector3 dR(R_index[0], R_index[1], R_index[2]); const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); - kphase_vec[ik * r_size + ir] = std::complex(cosp, sinp); + kphase_vec[ik][iR] = std::complex(cosp, sinp); } } - std::vector> tmp_DMK_mat(mat_size); - // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4 - // step_trace is used when nspin = 4; - int step_trace[4]{}; - if(PARAM.inp.nspin == 4) - { - const int npol = 2; - for (int is = 0; is < npol; is++) - { - for (int is2 = 0; is2 < npol; is2++) - { - step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; - } - } - } + std::vector> DMK_mat_trans(mat_size); for(int ik = 0; ik < this->_nk; ++ik) { - if(ik_in >= 0 && ik_in != ik) - { - continue; - } + if(ik_in >= 0 && ik_in != ik) { continue; } - // copy column-major DMK to row-major tmp_DMK_mat (for the purpose of computational efficiency) - const std::complex* DMK_mat_ptr = this->_DMK[ik + ik_begin].data() + col_ap * this->_paraV->nrow + row_ap; - for(int icol = 0; icol < col_size; ++icol) - { - for(int irow = 0; irow < row_size; ++irow) - { - tmp_DMK_mat[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; - } - } + // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational efficiency) + const std::complex*const DMK_mat_ptr + = this->_DMK[ik + ik_begin].data() + + col_ap * this->_paraV->nrow + row_ap; + for(int icol = 0; icol < col_size; ++icol) { + for(int irow = 0; irow < row_size; ++irow) { + DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; + }} // if nspin != 4, fill DMR // if nspin == 4, fill tmp_DMR - for(int ir = 0; ir < r_size; ++ir) + for(int iR = 0; iR < R_size; ++iR) { - std::complex kphase = kphase_vec[ik * r_size + ir]; - if(PARAM.inp.nspin != 4) + // (kr+i*ki) * (Dr+i*Di) = (kr*Dr-ki*Di) + i*(kr*Di+ki*Dr) + const std::complex kphase = kphase_vec[ik][iR]; + if(PARAM.inp.nspin != 4) // only save real kr*Dr-ki*Di { - double* target_DMR_mat = target_DMR_mat_vec[ir]; + TR* target_DMR_mat = target_DMR_mat_vec[iR]; for(int i = 0; i < mat_size; i++) { - target_DMR_mat[i] += kphase.real() * tmp_DMK_mat[i].real() - - kphase.imag() * tmp_DMK_mat[i].imag(); + target_DMR_mat[i] + += kphase.real() * DMK_mat_trans[i].real() + - kphase.imag() * DMK_mat_trans[i].imag(); } } else if(PARAM.inp.nspin == 4) { - std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; BlasConnector::axpy(mat_size, kphase, - tmp_DMK_mat.data(), + DMK_mat_trans.data(), 1, - tmp_DMR_mat, + &tmp_DMR[iR * mat_size], 1); } } @@ -188,11 +161,19 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) // copy tmp_DMR to fill target_DMR if(PARAM.inp.nspin == 4) { + // step_trace ={0, 1, local_col, local_col+1} for NSPIN=4 + int step_trace[4]{}; + constexpr int npol = 2; + for (int is = 0; is < npol; is++) { + for (int is2 = 0; is2 < npol; is2++) { + step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; + }} + std::complex tmp[4]{}; - for(int ir = 0; ir < r_size; ++ir) + for(int iR = 0; iR < R_size; ++iR) { - std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; - double* target_DMR_mat = target_DMR_mat_vec[ir]; + const std::complex* tmp_DMR_mat = &tmp_DMR[iR * mat_size]; + TR* target_DMR_mat = target_DMR_mat_vec[iR]; for (int irow = 0; irow < row_size; irow += 2) { for (int icol = 0; icol < col_size; icol += 2) @@ -204,11 +185,10 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; // transfer to Pauli matrix and save the real part // save them back to the target_mat - target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); - target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); - target_DMR_mat[icol + step_trace[2]] - = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() - target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); + target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); // (rho_upup + rho_downdown).real() + target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); // (rho_updown + rho_downup).real() + target_DMR_mat[icol + step_trace[2]] = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() + target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); // (rho_upup - rho_downdown).real() } tmp_DMR_mat += col_size * 2; target_DMR_mat += col_size * 2; @@ -225,132 +205,104 @@ template <> void DensityMatrix, double>::cal_DMR_td(const UnitCell& ucell, const ModuleBase::Vector3 At, const int ik_in) { ModuleBase::TITLE("DensityMatrix", "cal_DMR_td"); + using TR = double; // To check whether DMR has been initialized -#ifdef __DEBUG assert(!this->_DMR.empty() && "DMR has not been initialized!"); -#endif ModuleBase::timer::tick("DensityMatrix", "cal_DMR_td"); - int ld_hk = this->_paraV->nrow; + const int ld_hk = this->_paraV->nrow; for (int is = 1; is <= this->_nspin; ++is) { - int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* target_DMR = this->_DMR[is - 1]; + const int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 + hamilt::HContainer*const target_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step target_DMR->set_zero(); -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic) -#endif + #ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) + #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); - int iat1 = target_ap.get_atom_i(); - int iat2 = target_ap.get_atom_j(); + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); + const int iat1 = target_ap.get_atom_i(); + const int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process - int row_ap = this->_paraV->atom_begin_row[iat1]; - int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_ap = this->_paraV->atom_begin_row[iat1]; + const int col_ap = this->_paraV->atom_begin_col[iat2]; const int row_size = this->_paraV->get_row_size(iat1); const int col_size = this->_paraV->get_col_size(iat2); const int mat_size = row_size * col_size; - const int r_size = target_ap.get_R_size(); - if (row_ap == -1 || col_ap == -1) - { - throw std::string("Atom-pair not belong this process"); - } - std::vector> tmp_DMR; - if (PARAM.inp.nspin == 4) - { - tmp_DMR.resize(mat_size * r_size, 0); - } + const int R_size = target_ap.get_R_size(); + assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); + std::vector> tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); // calculate kphase and target_mat_ptr - std::vector> kphase_vec(r_size * this->_nk); - std::vector target_DMR_mat_vec(r_size); - for(int ir = 0; ir < r_size; ++ir) + std::vector>> kphase_vec(this->_nk, {R_size}); + std::vector target_DMR_mat_vec(R_size); + for(int iR = 0; iR < R_size; ++iR) { - const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); - hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); -#ifdef __DEBUG + const ModuleBase::Vector3 R_index = target_ap.get_R_index(iR); + hamilt::BaseMatrix*const target_mat = target_ap.find_matrix(R_index); + #ifdef __DEBUG if (target_mat == nullptr) { std::cout << "target_mat is nullptr" << std::endl; continue; } -#endif - target_DMR_mat_vec[ir] = target_mat->get_pointer(); + #endif + target_DMR_mat_vec[iR] = target_mat->get_pointer(); double arg_td = 0.0; //cal tddft phase for hybrid gauge - ModuleBase::Vector3 dtau = ucell.cal_dtau(iat1, iat2, r_index); + ModuleBase::Vector3 dtau = ucell.cal_dtau(iat1, iat2, R_index); arg_td = At * dtau * ucell.lat0; for(int ik = 0; ik < this->_nk; ++ik) { - if(ik_in >= 0 && ik_in != ik) - { - continue; - } + if(ik_in >= 0 && ik_in != ik) { continue; } // cal k_phase // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); + const ModuleBase::Vector3 dR(R_index[0], R_index[1], R_index[2]); const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI + arg_td; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); - kphase_vec[ik * r_size + ir] = std::complex(cosp, sinp); + kphase_vec[ik][iR] = std::complex(cosp, sinp); } } - std::vector> tmp_DMK_mat(mat_size); - // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4 - // step_trace is used when nspin = 4; - int step_trace[4]{}; - if(PARAM.inp.nspin == 4) - { - const int npol = 2; - for (int is = 0; is < npol; is++) - { - for (int is2 = 0; is2 < npol; is2++) - { - step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; - } - } - } + std::vector> DMK_mat_trans(mat_size); for(int ik = 0; ik < this->_nk; ++ik) { - if(ik_in >= 0 && ik_in != ik) - { - continue; - } + if(ik_in >= 0 && ik_in != ik) { continue; } - // copy column-major DMK to row-major tmp_DMK_mat (for the purpose of computational efficiency) - const std::complex* DMK_mat_ptr = this->_DMK[ik + ik_begin].data() + col_ap * this->_paraV->nrow + row_ap; - for(int icol = 0; icol < col_size; ++icol) - { - for(int irow = 0; irow < row_size; ++irow) - { - tmp_DMK_mat[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; - } - } + // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational efficiency) + const std::complex*const DMK_mat_ptr + = this->_DMK[ik + ik_begin].data() + + col_ap * this->_paraV->nrow + row_ap; + for(int icol = 0; icol < col_size; ++icol) { + for(int irow = 0; irow < row_size; ++irow) { + DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; + }} // if nspin != 4, fill DMR // if nspin == 4, fill tmp_DMR - for(int ir = 0; ir < r_size; ++ir) + for(int iR = 0; iR < R_size; ++iR) { - std::complex kphase = kphase_vec[ik * r_size + ir]; - if(PARAM.inp.nspin != 4) + // (kr+i*ki) * (Dr+i*Di) = (kr*Dr-ki*Di) + i*(kr*Di+ki*Dr) + const std::complex kphase = kphase_vec[ik][iR]; + if(PARAM.inp.nspin != 4) // only save real kr*Dr-ki*Di { - double* target_DMR_mat = target_DMR_mat_vec[ir]; + TR* target_DMR_mat = target_DMR_mat_vec[iR]; for(int i = 0; i < mat_size; i++) { - target_DMR_mat[i] += kphase.real() * tmp_DMK_mat[i].real() - - kphase.imag() * tmp_DMK_mat[i].imag(); + target_DMR_mat[i] + += kphase.real() * DMK_mat_trans[i].real() + - kphase.imag() * DMK_mat_trans[i].imag(); } } else if(PARAM.inp.nspin == 4) { - std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; BlasConnector::axpy(mat_size, kphase, - tmp_DMK_mat.data(), + DMK_mat_trans.data(), 1, - tmp_DMR_mat, + &tmp_DMR[iR * mat_size], 1); } } @@ -360,11 +312,19 @@ void DensityMatrix, double>::cal_DMR_td(const UnitCell& uce // copy tmp_DMR to fill target_DMR if(PARAM.inp.nspin == 4) { + // step_trace ={0, 1, local_col, local_col+1} for NSPIN=4 + int step_trace[4]{}; + constexpr int npol = 2; + for (int is = 0; is < npol; is++) { + for (int is2 = 0; is2 < npol; is2++) { + step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; + }} + std::complex tmp[4]{}; - for(int ir = 0; ir < r_size; ++ir) + for(int iR = 0; iR < R_size; ++iR) { - std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; - double* target_DMR_mat = target_DMR_mat_vec[ir]; + const std::complex* tmp_DMR_mat = &tmp_DMR[iR * mat_size]; + TR* target_DMR_mat = target_DMR_mat_vec[iR]; for (int irow = 0; irow < row_size; irow += 2) { for (int icol = 0; icol < col_size; icol += 2) @@ -376,11 +336,10 @@ void DensityMatrix, double>::cal_DMR_td(const UnitCell& uce tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; // transfer to Pauli matrix and save the real part // save them back to the target_mat - target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); - target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); - target_DMR_mat[icol + step_trace[2]] - = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() - target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); + target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); // (rho_upup + rho_downdown).real() + target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); // (rho_updown + rho_downup).real() + target_DMR_mat[icol + step_trace[2]] = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() + target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); // (rho_upup - rho_downdown).real() } tmp_DMR_mat += col_size * 2; target_DMR_mat += col_size * 2; @@ -401,73 +360,72 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine ModuleBase::TITLE("DensityMatrix", "cal_DMR_full"); ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); - int ld_hk = this->_paraV->nrow; + const int ld_hk = this->_paraV->nrow; hamilt::HContainer>* target_DMR = dmR_out; // set zero since this function is called in every scf step target_DMR->set_zero(); -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic) -#endif + #ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) + #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - auto& target_ap = target_DMR->get_atom_pair(i); - int iat1 = target_ap.get_atom_i(); - int iat2 = target_ap.get_atom_j(); + hamilt::AtomPair>& target_ap = target_DMR->get_atom_pair(i); + const int iat1 = target_ap.get_atom_i(); + const int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process - int row_ap = this->_paraV->atom_begin_row[iat1]; - int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_ap = this->_paraV->atom_begin_row[iat1]; + const int col_ap = this->_paraV->atom_begin_col[iat2]; const int row_size = this->_paraV->get_row_size(iat1); const int col_size = this->_paraV->get_col_size(iat2); const int mat_size = row_size * col_size; - const int r_size = target_ap.get_R_size(); + const int R_size = target_ap.get_R_size(); // calculate kphase and target_mat_ptr - std::vector> kphase_vec(r_size * this->_nk); - std::vector*> target_DMR_mat_vec(r_size); - for(int ir = 0; ir < r_size; ++ir) + std::vector>> kphase_vec(this->_nk, {R_size}); + std::vector*> target_DMR_mat_vec(R_size); + for(int iR = 0; iR < R_size; ++iR) { - const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); - hamilt::BaseMatrix>* target_mat = target_ap.find_matrix(r_index); -#ifdef __DEBUG + const ModuleBase::Vector3 R_index = target_ap.get_R_index(iR); + hamilt::BaseMatrix>*const target_mat = target_ap.find_matrix(R_index); + #ifdef __DEBUG if (target_mat == nullptr) { std::cout << "target_mat is nullptr" << std::endl; continue; } -#endif - target_DMR_mat_vec[ir] = target_mat->get_pointer(); + #endif + target_DMR_mat_vec[iR] = target_mat->get_pointer(); for(int ik = 0; ik < this->_nk; ++ik) { // cal k_phase // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); + const ModuleBase::Vector3 dR(R_index[0], R_index[1], R_index[2]); const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); - kphase_vec[ik * r_size + ir] = std::complex(cosp, sinp); + kphase_vec[ik][iR] = std::complex(cosp, sinp); } } - std::vector> tmp_DMK_mat(mat_size); + std::vector> DMK_mat_trans(mat_size); for(int ik = 0; ik < this->_nk; ++ik) { - // copy column-major DMK to row-major tmp_DMK_mat (for the purpose of computational efficiency) - const std::complex* DMK_mat_ptr = this->_DMK[ik].data() + col_ap * this->_paraV->nrow + row_ap; - for(int icol = 0; icol < col_size; ++icol) + // copy column-major DMK to row-major DMK_mat_trans (for the purpose of computational efficiency) + const std::complex*const DMK_mat_ptr + = this->_DMK[ik].data() + + col_ap * this->_paraV->nrow + row_ap; + for(int icol = 0; icol < col_size; ++icol) { + for(int irow = 0; irow < row_size; ++irow) { + DMK_mat_trans[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; + }} + + for(int iR = 0; iR < R_size; ++iR) { - for(int irow = 0; irow < row_size; ++irow) - { - tmp_DMK_mat[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; - } - } - - for(int ir = 0; ir < r_size; ++ir) - { - std::complex kphase = kphase_vec[ik * r_size + ir]; - std::complex* target_DMR_mat = target_DMR_mat_vec[ir]; + const std::complex kphase = kphase_vec[ik][iR]; + std::complex* target_DMR_mat = target_DMR_mat_vec[iR]; BlasConnector::axpy(mat_size, kphase, - tmp_DMK_mat.data(), + DMK_mat_trans.data(), 1, target_DMR_mat, 1); @@ -484,64 +442,55 @@ void DensityMatrix::cal_DMR(const int ik_in) ModuleBase::TITLE("DensityMatrix", "cal_DMR"); assert(ik_in == -1 || ik_in == 0); + assert(this->_nk == 1); // To check whether DMR has been initialized -#ifdef __DEBUG assert(!this->_DMR.empty() && "DMR has not been initialized!"); -#endif ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); - int ld_hk = this->_paraV->nrow; + const int ld_hk = this->_paraV->nrow; for (int is = 1; is <= this->_nspin; ++is) { - int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* target_DMR = this->_DMR[is - 1]; + const int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 + hamilt::HContainer*const target_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step target_DMR->set_zero(); -#ifdef __DEBUG // assert(target_DMR->is_gamma_only() == true); - assert(this->_nk == 1); -#endif -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic) -#endif + #ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) + #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); - int iat1 = target_ap.get_atom_i(); - int iat2 = target_ap.get_atom_j(); + const int iat1 = target_ap.get_atom_i(); + const int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process - int row_ap = this->_paraV->atom_begin_row[iat1]; - int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_ap = this->_paraV->atom_begin_row[iat1]; + const int col_ap = this->_paraV->atom_begin_col[iat2]; + assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); const int row_size = this->_paraV->get_row_size(iat1); const int col_size = this->_paraV->get_col_size(iat2); - const int r_size = target_ap.get_R_size(); - if (row_ap == -1 || col_ap == -1) - { - throw std::string("Atom-pair not belong this process"); - } - // R index - const ModuleBase::Vector3 r_index = target_ap.get_R_index(0); -#ifdef __DEBUG - assert(r_size == 1); - assert(r_index.x == 0 && r_index.y == 0 && r_index.z == 0); -#endif - hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); -#ifdef __DEBUG + const int R_size = target_ap.get_R_size(); + assert(R_size == 1); + const ModuleBase::Vector3 R_index = target_ap.get_R_index(0); + assert(R_index.x == 0 && R_index.y == 0 && R_index.z == 0); + hamilt::BaseMatrix*const target_mat = target_ap.find_matrix(R_index); + #ifdef __DEBUG if (target_mat == nullptr) { std::cout << "target_mat is nullptr" << std::endl; continue; } -#endif + #endif // k index - double kphase = 1; + constexpr double kphase = 1; + // transpose DMK col=>row + const double* DMK_ptr + = this->_DMK[0 + ik_begin].data() + + col_ap * this->_paraV->nrow + row_ap; // set DMR element double* target_DMR_ptr = target_mat->get_pointer(); - double* DMK_ptr = this->_DMK[0 + ik_begin].data(); - // transpose DMK col=>row - DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < row_size; ++mu) { BlasConnector::axpy(col_size, diff --git a/source/source_lcao/module_lr/dm_trans/dmr_complex.cpp b/source/source_lcao/module_lr/dm_trans/dmr_complex.cpp index 8009409d5d..3ed8ab69bf 100644 --- a/source/source_lcao/module_lr/dm_trans/dmr_complex.cpp +++ b/source/source_lcao/module_lr/dm_trans/dmr_complex.cpp @@ -11,8 +11,8 @@ namespace elecstate ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); for (int is = 1; is <= this->_nspin; ++is) { - int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer>* tmp_DMR = this->_DMR[is - 1]; + const int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 + hamilt::HContainer>*const tmp_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step tmp_DMR->set_zero(); #ifdef _OPENMP @@ -21,19 +21,16 @@ namespace elecstate for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) { hamilt::AtomPair>& tmp_ap = tmp_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); + const int iat1 = tmp_ap.get_atom_i(); + const int iat2 = tmp_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process - int row_ap = this->_paraV->atom_begin_row[iat1]; - int col_ap = this->_paraV->atom_begin_col[iat2]; - if (row_ap == -1 || col_ap == -1) - { - throw std::string("Atom-pair not belong this process"); - } + const int row_ap = this->_paraV->atom_begin_row[iat1]; + const int col_ap = this->_paraV->atom_begin_col[iat2]; + assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) { const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); - hamilt::BaseMatrix>* tmp_matrix = tmp_ap.find_matrix(r_index); + hamilt::BaseMatrix>*const tmp_matrix = tmp_ap.find_matrix(r_index); #ifdef __DEBUG if (tmp_matrix == nullptr) { @@ -52,13 +49,14 @@ namespace elecstate const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); + const std::complex kphase = std::complex(cosp, sinp); // set DMR element std::complex* tmp_DMR_pointer = tmp_matrix->get_pointer(); - std::complex* tmp_DMK_pointer = this->_DMK[ik + ik_begin].data(); + const std::complex* tmp_DMK_pointer + = this->_DMK[ik + ik_begin].data() + + col_ap * this->_paraV->nrow + row_ap; // jump DMK to fill DMR // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { BlasConnector::axpy(this->_paraV->get_col_size(iat2), @@ -71,11 +69,10 @@ namespace elecstate tmp_DMR_pointer += this->_paraV->get_col_size(iat2); } } -} + } // treat DMR as pauli matrix when NSPIN=4 - if (PARAM.inp.nspin == 4) { - throw std::runtime_error("complex DM(R) with NSPIN=4 is not implemented yet"); -} + if (PARAM.inp.nspin == 4) + { throw std::runtime_error("complex DM(R) with NSPIN=4 is not implemented yet"); } } } } From a7dfa04a81a721e0ffaa888e973cb4d58e02058f Mon Sep 17 00:00:00 2001 From: linpz Date: Mon, 5 Jan 2026 22:48:40 +0800 Subject: [PATCH 2/2] Fix bug of kphase_vec init in DensityMatrix --- source/source_estate/module_dm/density_matrix.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/source_estate/module_dm/density_matrix.cpp b/source/source_estate/module_dm/density_matrix.cpp index 61b162cfc8..1e59218b0d 100644 --- a/source/source_estate/module_dm/density_matrix.cpp +++ b/source/source_estate/module_dm/density_matrix.cpp @@ -86,10 +86,9 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) const int mat_size = row_size * col_size; const int R_size = target_ap.get_R_size(); assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); - std::vector> tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); // calculate kphase and target_mat_ptr - std::vector>> kphase_vec(this->_nk, {R_size}); + std::vector>> kphase_vec(this->_nk, std::vector>(R_size)); std::vector target_DMR_mat_vec(R_size); for(int iR = 0; iR < R_size; ++iR) { @@ -117,6 +116,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) } std::vector> DMK_mat_trans(mat_size); + std::vector> tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); for(int ik = 0; ik < this->_nk; ++ik) { if(ik_in >= 0 && ik_in != ik) { continue; } @@ -233,10 +233,9 @@ void DensityMatrix, double>::cal_DMR_td(const UnitCell& uce const int mat_size = row_size * col_size; const int R_size = target_ap.get_R_size(); assert(row_ap != -1 && col_ap != -1 && "Atom-pair not belong this process"); - std::vector> tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); // calculate kphase and target_mat_ptr - std::vector>> kphase_vec(this->_nk, {R_size}); + std::vector>> kphase_vec(this->_nk, std::vector>(R_size)); std::vector target_DMR_mat_vec(R_size); for(int iR = 0; iR < R_size; ++iR) { @@ -268,6 +267,7 @@ void DensityMatrix, double>::cal_DMR_td(const UnitCell& uce } std::vector> DMK_mat_trans(mat_size); + std::vector> tmp_DMR( (PARAM.inp.nspin==4) ? mat_size*R_size : 0); for(int ik = 0; ik < this->_nk; ++ik) { if(ik_in >= 0 && ik_in != ik) { continue; } @@ -381,7 +381,7 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine const int R_size = target_ap.get_R_size(); // calculate kphase and target_mat_ptr - std::vector>> kphase_vec(this->_nk, {R_size}); + std::vector>> kphase_vec(this->_nk, std::vector>(R_size)); std::vector*> target_DMR_mat_vec(R_size); for(int iR = 0; iR < R_size; ++iR) {