From 884cc87b9b70b5b00379ffdcb4f2ee44aa1637e3 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 18 Mar 2025 11:44:03 +0800 Subject: [PATCH 1/9] modify variable name --- .../module_dm/density_matrix.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index 56475e6840..dd527f8440 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -68,15 +68,15 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) for (int is = 1; is <= this->_nspin; ++is) { int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* tmp_DMR = this->_DMR[is - 1]; + hamilt::HContainer* target_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step - tmp_DMR->set_zero(); + target_DMR->set_zero(); #ifdef _OPENMP #pragma omp parallel for #endif - for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& tmp_ap = tmp_DMR->get_atom_pair(i); + hamilt::AtomPair& tmp_ap = target_DMR->get_atom_pair(i); int iat1 = tmp_ap.get_atom_i(); int iat2 = tmp_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process @@ -233,15 +233,15 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); int ld_hk = this->_paraV->nrow; int ld_hk2 = 2 * ld_hk; - hamilt::HContainer>* tmp_DMR = dmR_out; + hamilt::HContainer>* target_DMR = dmR_out; // set zero since this function is called in every scf step - tmp_DMR->set_zero(); + target_DMR->set_zero(); #ifdef _OPENMP #pragma omp parallel for #endif - for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - auto& tmp_ap = tmp_DMR->get_atom_pair(i); + auto& tmp_ap = target_DMR->get_atom_pair(i); int iat1 = tmp_ap.get_atom_i(); int iat2 = tmp_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process @@ -312,20 +312,20 @@ void DensityMatrix::cal_DMR(const int ik_in) for (int is = 1; is <= this->_nspin; ++is) { int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 - hamilt::HContainer* tmp_DMR = this->_DMR[is - 1]; + hamilt::HContainer* target_DMR = this->_DMR[is - 1]; // set zero since this function is called in every scf step - tmp_DMR->set_zero(); + target_DMR->set_zero(); #ifdef __DEBUG - // assert(tmp_DMR->is_gamma_only() == true); + // assert(target_DMR->is_gamma_only() == true); assert(this->_nk == 1); #endif #ifdef _OPENMP #pragma omp parallel for #endif - for (int i = 0; i < tmp_DMR->size_atom_pairs(); ++i) + for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& tmp_ap = tmp_DMR->get_atom_pair(i); + hamilt::AtomPair& tmp_ap = target_DMR->get_atom_pair(i); int iat1 = tmp_ap.get_atom_i(); int iat2 = tmp_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process From 55cf4cca359b44095a67ab33f2374ca74c463d2b Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 18 Mar 2025 11:47:15 +0800 Subject: [PATCH 2/9] modify variable name --- .../module_dm/density_matrix.cpp | 78 +++++++++---------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index dd527f8440..a311dd408c 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -76,9 +76,9 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& tmp_ap = target_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); + int iat1 = target_ap.get_atom_i(); + int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; @@ -89,16 +89,16 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) std::vector> tmp_DMR; if (PARAM.inp.nspin == 4) { - tmp_DMR.resize(tmp_ap.get_size()); + tmp_DMR.resize(target_ap.get_size()); } - for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) + for (int ir = 0; ir < target_ap.get_R_size(); ++ir) { - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); - hamilt::BaseMatrix* tmp_matrix = tmp_ap.find_matrix(r_index); + const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); + hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); #ifdef __DEBUG - if (tmp_matrix == nullptr) + if (target_mat == nullptr) { - std::cout << "tmp_matrix is nullptr" << std::endl; + std::cout << "target_mat is nullptr" << std::endl; continue; } #endif @@ -117,7 +117,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) ModuleBase::libm::sincos(arg, &sinp, &cosp); std::complex kphase = std::complex(cosp, sinp); // set DMR element - double* tmp_DMR_pointer = tmp_matrix->get_pointer(); + double* tmp_DMR_pointer = target_mat->get_pointer(); std::complex* tmp_DMK_pointer = this->_DMK[ik + ik_begin].data(); double* DMK_real_pointer = nullptr; double* DMK_imag_pointer = nullptr; @@ -150,7 +150,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) // treat DMR as pauli matrix when NSPIN=4 if (PARAM.inp.nspin == 4) { - tmp_DMR.assign(tmp_ap.get_size(), std::complex(0.0, 0.0)); + tmp_DMR.assign(target_ap.get_size(), std::complex(0.0, 0.0)); for (int ik = 0; ik < this->_nk; ++ik) { if(ik_in >= 0 && ik_in != ik) { continue; @@ -170,16 +170,16 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) // jump DMK to fill DMR // DMR is row-major, DMK is column-major tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < tmp_ap.get_row_size(); ++mu) + for (int mu = 0; mu < target_ap.get_row_size(); ++mu) { - BlasConnector::axpy(tmp_ap.get_col_size(), + BlasConnector::axpy(target_ap.get_col_size(), kphase, tmp_DMK_pointer, ld_hk, tmp_DMR_pointer, 1); tmp_DMK_pointer += 1; - tmp_DMR_pointer += tmp_ap.get_col_size(); + tmp_DMR_pointer += target_ap.get_col_size(); } } int npol = 2; @@ -189,15 +189,15 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) { for (int is2 = 0; is2 < npol; is2++) { - step_trace[is * npol + is2] = tmp_ap.get_col_size() * is + is2; + step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; } } std::complex tmp[4]; - double* target_DMR = tmp_matrix->get_pointer(); + double* target_DMR = target_mat->get_pointer(); std::complex* tmp_DMR_pointer = tmp_DMR.data(); - for (int irow = 0; irow < tmp_ap.get_row_size(); irow += 2) + for (int irow = 0; irow < target_ap.get_row_size(); irow += 2) { - for (int icol = 0; icol < tmp_ap.get_col_size(); icol += 2) + for (int icol = 0; icol < target_ap.get_col_size(); icol += 2) { // catch the 4 spin component value of one orbital pair tmp[0] = tmp_DMR_pointer[icol + step_trace[0]]; @@ -205,15 +205,15 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) tmp[2] = tmp_DMR_pointer[icol + step_trace[2]]; tmp[3] = tmp_DMR_pointer[icol + step_trace[3]]; // transfer to Pauli matrix and save the real part - // save them back to the tmp_matrix + // save them back to the target_mat target_DMR[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); target_DMR[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); target_DMR[icol + step_trace[2]] = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() target_DMR[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); } - tmp_DMR_pointer += tmp_ap.get_col_size() * 2; - target_DMR += tmp_ap.get_col_size() * 2; + tmp_DMR_pointer += target_ap.get_col_size() * 2; + target_DMR += target_ap.get_col_size() * 2; } } } @@ -241,20 +241,20 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - auto& tmp_ap = target_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); + auto& target_ap = target_DMR->get_atom_pair(i); + int iat1 = target_ap.get_atom_i(); + int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; - for (int ir = 0; ir < tmp_ap.get_R_size(); ++ir) + for (int ir = 0; ir < target_ap.get_R_size(); ++ir) { - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(ir); - auto* tmp_matrix = tmp_ap.find_matrix(r_index); + const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); + auto* target_mat = target_ap.find_matrix(r_index); #ifdef __DEBUG - if (tmp_matrix == nullptr) + if (target_mat == nullptr) { - std::cout << "tmp_matrix is nullptr" << std::endl; + std::cout << "target_mat is nullptr" << std::endl; continue; } #endif @@ -270,7 +270,7 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine ModuleBase::libm::sincos(arg, &sinp, &cosp); std::complex kphase = std::complex(cosp, sinp); // set DMR element - std::complex* tmp_DMR_pointer = tmp_matrix->get_pointer(); + std::complex* tmp_DMR_pointer = target_mat->get_pointer(); const std::complex* tmp_DMK_pointer = this->_DMK[ik].data(); double* DMK_real_pointer = nullptr; double* DMK_imag_pointer = nullptr; @@ -325,9 +325,9 @@ void DensityMatrix::cal_DMR(const int ik_in) #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { - hamilt::AtomPair& tmp_ap = target_DMR->get_atom_pair(i); - int iat1 = tmp_ap.get_atom_i(); - int iat2 = tmp_ap.get_atom_j(); + hamilt::AtomPair& target_ap = target_DMR->get_atom_pair(i); + int iat1 = target_ap.get_atom_i(); + int iat2 = target_ap.get_atom_j(); // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; @@ -336,23 +336,23 @@ void DensityMatrix::cal_DMR(const int ik_in) throw std::string("Atom-pair not belong this process"); } // R index - const ModuleBase::Vector3 r_index = tmp_ap.get_R_index(0); + const ModuleBase::Vector3 r_index = target_ap.get_R_index(0); #ifdef __DEBUG - assert(tmp_ap.get_R_size() == 1); + assert(target_ap.get_R_size() == 1); assert(r_index.x == 0 && r_index.y == 0 && r_index.z == 0); #endif - hamilt::BaseMatrix* tmp_matrix = tmp_ap.find_matrix(r_index); + hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); #ifdef __DEBUG - if (tmp_matrix == nullptr) + if (target_mat == nullptr) { - std::cout << "tmp_matrix is nullptr" << std::endl; + std::cout << "target_mat is nullptr" << std::endl; continue; } #endif // k index double kphase = 1; // set DMR element - double* tmp_DMR_pointer = tmp_matrix->get_pointer(); + double* tmp_DMR_pointer = target_mat->get_pointer(); double* tmp_DMK_pointer = this->_DMK[0 + ik_begin].data(); // transpose DMK col=>row tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; From de72e6e51897f84d0c5f387445f20dc12cd1de77 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 18 Mar 2025 11:50:32 +0800 Subject: [PATCH 3/9] change pointer to ptr --- .../module_dm/density_matrix.cpp | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index a311dd408c..839b7da0f0 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -117,32 +117,32 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) ModuleBase::libm::sincos(arg, &sinp, &cosp); std::complex kphase = std::complex(cosp, sinp); // set DMR element - double* tmp_DMR_pointer = target_mat->get_pointer(); - std::complex* tmp_DMK_pointer = this->_DMK[ik + ik_begin].data(); - double* DMK_real_pointer = nullptr; - double* DMK_imag_pointer = nullptr; + double* target_DMR_ptr = target_mat->get_pointer(); + std::complex* tmp_DMK_ptr = this->_DMK[ik + ik_begin].data(); + double* DMK_real_ptr = nullptr; + double* DMK_imag_ptr = nullptr; // jump DMK to fill DMR // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; + tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { - DMK_real_pointer = (double*)tmp_DMK_pointer; - DMK_imag_pointer = DMK_real_pointer + 1; + DMK_real_ptr = (double*)tmp_DMK_ptr; + DMK_imag_ptr = DMK_real_ptr + 1; BlasConnector::axpy(this->_paraV->get_col_size(iat2), kphase.real(), - DMK_real_pointer, + DMK_real_ptr, ld_hk2, - tmp_DMR_pointer, + target_DMR_ptr, 1); // "-" since i^2 = -1 BlasConnector::axpy(this->_paraV->get_col_size(iat2), -kphase.imag(), - DMK_imag_pointer, + DMK_imag_ptr, ld_hk2, - tmp_DMR_pointer, + target_DMR_ptr, 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += this->_paraV->get_col_size(iat2); + tmp_DMK_ptr += 1; + target_DMR_ptr += this->_paraV->get_col_size(iat2); } } } @@ -163,23 +163,23 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) ModuleBase::libm::sincos(arg, &sinp, &cosp); std::complex kphase = std::complex(cosp, sinp); // set DMR element - std::complex* tmp_DMR_pointer = tmp_DMR.data(); - std::complex* tmp_DMK_pointer = this->_DMK[ik + ik_begin].data(); - double* DMK_real_pointer = nullptr; - double* DMK_imag_pointer = nullptr; + std::complex* tmp_DMR_ptr = tmp_DMR.data(); + std::complex* tmp_DMK_ptr = this->_DMK[ik + ik_begin].data(); + double* DMK_real_ptr = nullptr; + double* DMK_imag_ptr = nullptr; // jump DMK to fill DMR // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; + tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < target_ap.get_row_size(); ++mu) { BlasConnector::axpy(target_ap.get_col_size(), kphase, - tmp_DMK_pointer, + tmp_DMK_ptr, ld_hk, - tmp_DMR_pointer, + tmp_DMR_ptr, 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += target_ap.get_col_size(); + tmp_DMK_ptr += 1; + tmp_DMR_ptr += target_ap.get_col_size(); } } int npol = 2; @@ -194,16 +194,16 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) } std::complex tmp[4]; double* target_DMR = target_mat->get_pointer(); - std::complex* tmp_DMR_pointer = tmp_DMR.data(); + std::complex* tmp_DMR_ptr = tmp_DMR.data(); for (int irow = 0; irow < target_ap.get_row_size(); irow += 2) { for (int icol = 0; icol < target_ap.get_col_size(); icol += 2) { // catch the 4 spin component value of one orbital pair - tmp[0] = tmp_DMR_pointer[icol + step_trace[0]]; - tmp[1] = tmp_DMR_pointer[icol + step_trace[1]]; - tmp[2] = tmp_DMR_pointer[icol + step_trace[2]]; - tmp[3] = tmp_DMR_pointer[icol + step_trace[3]]; + tmp[0] = tmp_DMR_ptr[icol + step_trace[0]]; + tmp[1] = tmp_DMR_ptr[icol + step_trace[1]]; + tmp[2] = tmp_DMR_ptr[icol + step_trace[2]]; + tmp[3] = tmp_DMR_ptr[icol + step_trace[3]]; // transfer to Pauli matrix and save the real part // save them back to the target_mat target_DMR[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); @@ -212,7 +212,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() target_DMR[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); } - tmp_DMR_pointer += target_ap.get_col_size() * 2; + tmp_DMR_ptr += target_ap.get_col_size() * 2; target_DMR += target_ap.get_col_size() * 2; } } @@ -270,23 +270,23 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine ModuleBase::libm::sincos(arg, &sinp, &cosp); std::complex kphase = std::complex(cosp, sinp); // set DMR element - std::complex* tmp_DMR_pointer = target_mat->get_pointer(); - const std::complex* tmp_DMK_pointer = this->_DMK[ik].data(); - double* DMK_real_pointer = nullptr; - double* DMK_imag_pointer = nullptr; + std::complex* target_DMR_ptr = target_mat->get_pointer(); + const std::complex* tmp_DMK_ptr = this->_DMK[ik].data(); + double* DMK_real_ptr = nullptr; + double* DMK_imag_ptr = nullptr; // jump DMK to fill DMR // DMR is row-major, DMK is column-major - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; + tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { BlasConnector::axpy(this->_paraV->get_col_size(iat2), kphase, - tmp_DMK_pointer, + tmp_DMK_ptr, ld_hk, - tmp_DMR_pointer, + target_DMR_ptr, 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += this->_paraV->get_col_size(iat2); + tmp_DMK_ptr += 1; + target_DMR_ptr += this->_paraV->get_col_size(iat2); } } } @@ -352,20 +352,20 @@ void DensityMatrix::cal_DMR(const int ik_in) // k index double kphase = 1; // set DMR element - double* tmp_DMR_pointer = target_mat->get_pointer(); - double* tmp_DMK_pointer = this->_DMK[0 + ik_begin].data(); + double* target_DMR_ptr = target_mat->get_pointer(); + double* tmp_DMK_ptr = this->_DMK[0 + ik_begin].data(); // transpose DMK col=>row - tmp_DMK_pointer += col_ap * this->_paraV->nrow + row_ap; + tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { BlasConnector::axpy(this->_paraV->get_col_size(iat2), kphase, - tmp_DMK_pointer, + tmp_DMK_ptr, ld_hk, - tmp_DMR_pointer, + target_DMR_ptr, 1); - tmp_DMK_pointer += 1; - tmp_DMR_pointer += this->_paraV->get_col_size(iat2); + tmp_DMK_ptr += 1; + target_DMR_ptr += this->_paraV->get_col_size(iat2); } } } From 6ffabc59d1dc9495ec2bea6a76a4f02648b744c4 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 18 Mar 2025 11:55:16 +0800 Subject: [PATCH 4/9] modify variable name --- .../module_dm/density_matrix.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index 839b7da0f0..4cca1d3988 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -118,15 +118,15 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) std::complex kphase = std::complex(cosp, sinp); // set DMR element double* target_DMR_ptr = target_mat->get_pointer(); - std::complex* tmp_DMK_ptr = this->_DMK[ik + ik_begin].data(); + std::complex* DMK_ptr = this->_DMK[ik + ik_begin].data(); double* DMK_real_ptr = nullptr; double* DMK_imag_ptr = nullptr; // jump DMK to fill DMR // DMR is row-major, DMK is column-major - tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; + DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { - DMK_real_ptr = (double*)tmp_DMK_ptr; + DMK_real_ptr = (double*)DMK_ptr; DMK_imag_ptr = DMK_real_ptr + 1; BlasConnector::axpy(this->_paraV->get_col_size(iat2), kphase.real(), @@ -141,7 +141,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) ld_hk2, target_DMR_ptr, 1); - tmp_DMK_ptr += 1; + DMK_ptr += 1; target_DMR_ptr += this->_paraV->get_col_size(iat2); } } @@ -164,21 +164,21 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) std::complex kphase = std::complex(cosp, sinp); // set DMR element std::complex* tmp_DMR_ptr = tmp_DMR.data(); - std::complex* tmp_DMK_ptr = this->_DMK[ik + ik_begin].data(); + std::complex* DMK_ptr = this->_DMK[ik + ik_begin].data(); double* DMK_real_ptr = nullptr; double* DMK_imag_ptr = nullptr; // jump DMK to fill DMR // DMR is row-major, DMK is column-major - tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; + DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < target_ap.get_row_size(); ++mu) { BlasConnector::axpy(target_ap.get_col_size(), kphase, - tmp_DMK_ptr, + DMK_ptr, ld_hk, tmp_DMR_ptr, 1); - tmp_DMK_ptr += 1; + DMK_ptr += 1; tmp_DMR_ptr += target_ap.get_col_size(); } } @@ -271,21 +271,21 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine std::complex kphase = std::complex(cosp, sinp); // set DMR element std::complex* target_DMR_ptr = target_mat->get_pointer(); - const std::complex* tmp_DMK_ptr = this->_DMK[ik].data(); + const std::complex* DMK_ptr = this->_DMK[ik].data(); double* DMK_real_ptr = nullptr; double* DMK_imag_ptr = nullptr; // jump DMK to fill DMR // DMR is row-major, DMK is column-major - tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; + DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { BlasConnector::axpy(this->_paraV->get_col_size(iat2), kphase, - tmp_DMK_ptr, + DMK_ptr, ld_hk, target_DMR_ptr, 1); - tmp_DMK_ptr += 1; + DMK_ptr += 1; target_DMR_ptr += this->_paraV->get_col_size(iat2); } } @@ -353,18 +353,18 @@ void DensityMatrix::cal_DMR(const int ik_in) double kphase = 1; // set DMR element double* target_DMR_ptr = target_mat->get_pointer(); - double* tmp_DMK_ptr = this->_DMK[0 + ik_begin].data(); + double* DMK_ptr = this->_DMK[0 + ik_begin].data(); // transpose DMK col=>row - tmp_DMK_ptr += col_ap * this->_paraV->nrow + row_ap; + DMK_ptr += col_ap * this->_paraV->nrow + row_ap; for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) { BlasConnector::axpy(this->_paraV->get_col_size(iat2), kphase, - tmp_DMK_ptr, + DMK_ptr, ld_hk, target_DMR_ptr, 1); - tmp_DMK_ptr += 1; + DMK_ptr += 1; target_DMR_ptr += this->_paraV->get_col_size(iat2); } } From 5528ecc5df6cd8d37a2e85d0f20ad4637bca920f Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 25 Mar 2025 20:12:12 +0800 Subject: [PATCH 5/9] modify some variable names --- .../module_dm/density_matrix.cpp | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index 4cca1d3988..d2da9e2099 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -82,6 +82,9 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_size = this->_paraV->get_row_size(iat1); + const int col_size = this->_paraV->get_col_size(iat2); + const int r_size = target_ap.get_R_size(); if (row_ap == -1 || col_ap == -1) { throw std::string("Atom-pair not belong this process"); @@ -91,7 +94,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) { tmp_DMR.resize(target_ap.get_size()); } - for (int ir = 0; ir < target_ap.get_R_size(); ++ir) + for (int ir = 0; ir < r_size; ++ir) { const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); @@ -124,25 +127,25 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) // jump DMK to fill DMR // DMR is row-major, DMK is column-major DMK_ptr += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) + for (int mu = 0; mu < row_size; ++mu) { DMK_real_ptr = (double*)DMK_ptr; DMK_imag_ptr = DMK_real_ptr + 1; - BlasConnector::axpy(this->_paraV->get_col_size(iat2), + BlasConnector::axpy(col_size, kphase.real(), DMK_real_ptr, ld_hk2, target_DMR_ptr, 1); // "-" since i^2 = -1 - BlasConnector::axpy(this->_paraV->get_col_size(iat2), + BlasConnector::axpy(col_size, -kphase.imag(), DMK_imag_ptr, ld_hk2, target_DMR_ptr, 1); DMK_ptr += 1; - target_DMR_ptr += this->_paraV->get_col_size(iat2); + target_DMR_ptr += col_size; } } } @@ -247,7 +250,10 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; - for (int ir = 0; ir < target_ap.get_R_size(); ++ir) + const int row_size = this->_paraV->get_row_size(iat1); + const int col_size = this->_paraV->get_col_size(iat2); + const int r_size = target_ap.get_R_size(); + for (int ir = 0; ir < r_size; ++ir) { const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); auto* target_mat = target_ap.find_matrix(r_index); @@ -277,16 +283,16 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine // jump DMK to fill DMR // DMR is row-major, DMK is column-major DMK_ptr += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) + for (int mu = 0; mu < row_size; ++mu) { - BlasConnector::axpy(this->_paraV->get_col_size(iat2), + BlasConnector::axpy(col_size, kphase, DMK_ptr, ld_hk, target_DMR_ptr, 1); DMK_ptr += 1; - target_DMR_ptr += this->_paraV->get_col_size(iat2); + target_DMR_ptr += col_size; } } } @@ -331,6 +337,9 @@ void DensityMatrix::cal_DMR(const int ik_in) // get global indexes of whole matrix for each atom in this process int row_ap = this->_paraV->atom_begin_row[iat1]; int col_ap = this->_paraV->atom_begin_col[iat2]; + const int row_size = this->_paraV->get_row_size(iat1); + const int col_size = this->_paraV->get_col_size(iat2); + const int r_size = target_ap.get_R_size(); if (row_ap == -1 || col_ap == -1) { throw std::string("Atom-pair not belong this process"); @@ -338,7 +347,7 @@ void DensityMatrix::cal_DMR(const int ik_in) // R index const ModuleBase::Vector3 r_index = target_ap.get_R_index(0); #ifdef __DEBUG - assert(target_ap.get_R_size() == 1); + assert(r_size == 1); assert(r_index.x == 0 && r_index.y == 0 && r_index.z == 0); #endif hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); @@ -356,16 +365,16 @@ void DensityMatrix::cal_DMR(const int ik_in) double* DMK_ptr = this->_DMK[0 + ik_begin].data(); // transpose DMK col=>row DMK_ptr += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < this->_paraV->get_row_size(iat1); ++mu) + for (int mu = 0; mu < row_size; ++mu) { - BlasConnector::axpy(this->_paraV->get_col_size(iat2), + BlasConnector::axpy(col_size, kphase, DMK_ptr, ld_hk, target_DMR_ptr, 1); DMK_ptr += 1; - target_DMR_ptr += this->_paraV->get_col_size(iat2); + target_DMR_ptr += col_size; } } } From d2404347dcc7b1666f67f6d6ca3ae95c4768aa5c Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Tue, 25 Mar 2025 20:19:59 +0800 Subject: [PATCH 6/9] move functions from .cpp to .h --- source/module_basis/module_ao/parallel_orbitals.cpp | 10 ---------- source/module_basis/module_ao/parallel_orbitals.h | 4 ++-- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/source/module_basis/module_ao/parallel_orbitals.cpp b/source/module_basis/module_ao/parallel_orbitals.cpp index c25a12df77..4b6132cac4 100644 --- a/source/module_basis/module_ao/parallel_orbitals.cpp +++ b/source/module_basis/module_ao/parallel_orbitals.cpp @@ -91,16 +91,6 @@ void Parallel_Orbitals::set_atomic_trace(const int* iat2iwt, const int &nat, con this->atom_begin_col[nat] = this->ncol; } -// Get the number of columns of the parallel orbital matrix -int Parallel_Orbitals::get_col_size()const -{ - return this->ncol; -} -// Get the number of rows of the parallel orbital matrix -int Parallel_Orbitals::get_row_size()const -{ - return this->nrow; -} // Get the number of columns of the orbital matrix of the iat-th atom int Parallel_Orbitals::get_col_size(int iat) const { diff --git a/source/module_basis/module_ao/parallel_orbitals.h b/source/module_basis/module_ao/parallel_orbitals.h index fb052de4e0..77f094e361 100644 --- a/source/module_basis/module_ao/parallel_orbitals.h +++ b/source/module_basis/module_ao/parallel_orbitals.h @@ -73,8 +73,8 @@ class Parallel_Orbitals : public Parallel_2D * get_col_size(iat) : number of columns of Hamiltonian matrix in atom iat * get_row_size(iat) : number of rows of Hamiltonian matrix in atom iat */ - int get_col_size()const; - int get_row_size()const; + int get_col_size()const { return this->ncol; }; + int get_row_size()const { return this->nrow; }; int get_col_size(int iat) const; int get_row_size(int iat) const; From 5095627ca8b42e2067c976c03961ac9d478c5d65 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Wed, 26 Mar 2025 01:39:07 +0800 Subject: [PATCH 7/9] optimize cal_DMR --- .../module_dm/density_matrix.cpp | 247 +++++++++--------- 1 file changed, 128 insertions(+), 119 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index d2da9e2099..c19e042318 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -64,7 +64,6 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) ModuleBase::timer::tick("DensityMatrix", "cal_DMR"); int ld_hk = this->_paraV->nrow; - int ld_hk2 = 2 * ld_hk; for (int is = 1; is <= this->_nspin; ++is) { int ik_begin = this->_nk * (is - 1); // jump this->_nk for spin_down if nspin==2 @@ -84,6 +83,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) int col_ap = this->_paraV->atom_begin_col[iat2]; const int row_size = this->_paraV->get_row_size(iat1); const int col_size = this->_paraV->get_col_size(iat2); + const int mat_size = row_size * col_size; const int r_size = target_ap.get_R_size(); if (row_ap == -1 || col_ap == -1) { @@ -92,9 +92,13 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) std::vector> tmp_DMR; if (PARAM.inp.nspin == 4) { - tmp_DMR.resize(target_ap.get_size()); + tmp_DMR.resize(mat_size * r_size, 0); } - for (int ir = 0; ir < r_size; ++ir) + + // calculate kphase and target_mat_ptr + std::vector> kphase_vec(r_size * this->_nk); + std::vector target_DMR_mat_vec(r_size); + for(int ir = 0; ir < r_size; ++ir) { const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); hamilt::BaseMatrix* target_mat = target_ap.find_matrix(r_index); @@ -105,118 +109,112 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) continue; } #endif - // loop over k-points - if (PARAM.inp.nspin != 4) + target_DMR_mat_vec[ir] = target_mat->get_pointer(); + for(int ik = 0; ik < this->_nk; ++ik) + { + if(ik_in >= 0 && ik_in != ik) + { + continue; + } + // cal k_phase + // if TK==std::complex, kphase is e^{ikR} + const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); + const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; + double sinp, cosp; + ModuleBase::libm::sincos(arg, &sinp, &cosp); + kphase_vec[ik * r_size + ir] = std::complex(cosp, sinp); + } + } + + std::vector> tmp_DMK_mat(mat_size); + // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4 + // step_trace is used when nspin = 4; + int step_trace[4]{}; + if(PARAM.inp.nspin == 4) + { + const int npol = 2; + for (int is = 0; is < npol; is++) { - for (int ik = 0; ik < this->_nk; ++ik) + for (int is2 = 0; is2 < npol; is2++) { - if(ik_in >= 0 && ik_in != ik) { continue; -} - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); - const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - double* target_DMR_ptr = target_mat->get_pointer(); - std::complex* DMK_ptr = this->_DMK[ik + ik_begin].data(); - double* DMK_real_ptr = nullptr; - double* DMK_imag_ptr = nullptr; - // jump DMK to fill DMR - // DMR is row-major, DMK is column-major - DMK_ptr += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < row_size; ++mu) - { - DMK_real_ptr = (double*)DMK_ptr; - DMK_imag_ptr = DMK_real_ptr + 1; - BlasConnector::axpy(col_size, - kphase.real(), - DMK_real_ptr, - ld_hk2, - target_DMR_ptr, - 1); - // "-" since i^2 = -1 - BlasConnector::axpy(col_size, - -kphase.imag(), - DMK_imag_ptr, - ld_hk2, - target_DMR_ptr, - 1); - DMK_ptr += 1; - target_DMR_ptr += col_size; - } + step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; } } + } + for(int ik = 0; ik < this->_nk; ++ik) + { + if(ik_in >= 0 && ik_in != ik) + { + continue; + } - // treat DMR as pauli matrix when NSPIN=4 - if (PARAM.inp.nspin == 4) + // copy column-major DMK to row-major tmp_DMK_mat (for the purpose of computational efficiency) + const std::complex* DMK_mat_ptr = this->_DMK[ik + ik_begin].data() + col_ap * this->_paraV->nrow + row_ap; + for(int icol = 0; icol < col_size; ++icol) { - tmp_DMR.assign(target_ap.get_size(), std::complex(0.0, 0.0)); - for (int ik = 0; ik < this->_nk; ++ik) + for(int irow = 0; irow < row_size; ++irow) { - if(ik_in >= 0 && ik_in != ik) { continue; -} - // cal k_phase - // if TK==std::complex, kphase is e^{ikR} - const ModuleBase::Vector3 dR(r_index[0], r_index[1], r_index[2]); - const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; - double sinp, cosp; - ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - std::complex* tmp_DMR_ptr = tmp_DMR.data(); - std::complex* DMK_ptr = this->_DMK[ik + ik_begin].data(); - double* DMK_real_ptr = nullptr; - double* DMK_imag_ptr = nullptr; - // jump DMK to fill DMR - // DMR is row-major, DMK is column-major - DMK_ptr += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < target_ap.get_row_size(); ++mu) - { - BlasConnector::axpy(target_ap.get_col_size(), - kphase, - DMK_ptr, - ld_hk, - tmp_DMR_ptr, - 1); - DMK_ptr += 1; - tmp_DMR_ptr += target_ap.get_col_size(); - } + tmp_DMK_mat[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; } - int npol = 2; - // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4 - int step_trace[4]; - for (int is = 0; is < npol; is++) + } + + // if nspin != 4, fill DMR + // if nspin == 4, fill tmp_DMR + for(int ir = 0; ir < r_size; ++ir) + { + std::complex kphase = kphase_vec[ik * r_size + ir]; + if(PARAM.inp.nspin != 4) { - for (int is2 = 0; is2 < npol; is2++) + double* target_DMR_mat = target_DMR_mat_vec[ir]; + for(int irow = 0; irow < row_size; ++irow) { - step_trace[is * npol + is2] = target_ap.get_col_size() * is + is2; + for(int icol = 0; icol < col_size; ++icol) + { + target_DMR_mat[irow * col_size + icol] += kphase.real() * tmp_DMK_mat[irow * col_size + icol].real() + - kphase.imag() * tmp_DMK_mat[irow * col_size + icol].imag(); + } } + } else if(PARAM.inp.nspin == 4) + { + std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; + BlasConnector::axpy(mat_size, + kphase, + tmp_DMK_mat.data(), + 1, + tmp_DMR_mat, + 1); } - std::complex tmp[4]; - double* target_DMR = target_mat->get_pointer(); - std::complex* tmp_DMR_ptr = tmp_DMR.data(); - for (int irow = 0; irow < target_ap.get_row_size(); irow += 2) + } + } + + // if nspin == 4 + // copy tmp_DMR to fill target_DMR + if(PARAM.inp.nspin == 4) + { + std::complex tmp[4]{}; + for(int ir = 0; ir < r_size; ++ir) + { + std::complex* tmp_DMR_mat = &tmp_DMR[ir * mat_size]; + double* target_DMR_mat = target_DMR_mat_vec[ir]; + for (int irow = 0; irow < row_size; irow += 2) { - for (int icol = 0; icol < target_ap.get_col_size(); icol += 2) + for (int icol = 0; icol < col_size; icol += 2) { // catch the 4 spin component value of one orbital pair - tmp[0] = tmp_DMR_ptr[icol + step_trace[0]]; - tmp[1] = tmp_DMR_ptr[icol + step_trace[1]]; - tmp[2] = tmp_DMR_ptr[icol + step_trace[2]]; - tmp[3] = tmp_DMR_ptr[icol + step_trace[3]]; + tmp[0] = tmp_DMR_mat[icol + step_trace[0]]; + tmp[1] = tmp_DMR_mat[icol + step_trace[1]]; + tmp[2] = tmp_DMR_mat[icol + step_trace[2]]; + tmp[3] = tmp_DMR_mat[icol + step_trace[3]]; // transfer to Pauli matrix and save the real part // save them back to the target_mat - target_DMR[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); - target_DMR[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); - target_DMR[icol + step_trace[2]] + target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); + target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); + target_DMR_mat[icol + step_trace[2]] = -tmp[1].imag() + tmp[2].imag(); // (i * (rho_updown - rho_downup)).real() - target_DMR[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); + target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); } - tmp_DMR_ptr += target_ap.get_col_size() * 2; - target_DMR += target_ap.get_col_size() * 2; + tmp_DMR_mat += col_size * 2; + target_DMR_mat += col_size * 2; } } } @@ -252,11 +250,16 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine int col_ap = this->_paraV->atom_begin_col[iat2]; const int row_size = this->_paraV->get_row_size(iat1); const int col_size = this->_paraV->get_col_size(iat2); + const int mat_size = row_size * col_size; const int r_size = target_ap.get_R_size(); - for (int ir = 0; ir < r_size; ++ir) + + // calculate kphase and target_mat_ptr + std::vector> kphase_vec(r_size * this->_nk); + std::vector*> target_DMR_mat_vec(r_size); + for(int ir = 0; ir < r_size; ++ir) { const ModuleBase::Vector3 r_index = target_ap.get_R_index(ir); - auto* target_mat = target_ap.find_matrix(r_index); + hamilt::BaseMatrix>* target_mat = target_ap.find_matrix(r_index); #ifdef __DEBUG if (target_mat == nullptr) { @@ -264,9 +267,8 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine continue; } #endif - // loop over k-points - // calculate full matrix for complex density matrix - for (int ik = 0; ik < this->_nk; ++ik) + target_DMR_mat_vec[ir] = target_mat->get_pointer(); + for(int ik = 0; ik < this->_nk; ++ik) { // cal k_phase // if TK==std::complex, kphase is e^{ikR} @@ -274,27 +276,34 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine const double arg = (this->_kvec_d[ik] * dR) * ModuleBase::TWO_PI; double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); - std::complex kphase = std::complex(cosp, sinp); - // set DMR element - std::complex* target_DMR_ptr = target_mat->get_pointer(); - const std::complex* DMK_ptr = this->_DMK[ik].data(); - double* DMK_real_ptr = nullptr; - double* DMK_imag_ptr = nullptr; - // jump DMK to fill DMR - // DMR is row-major, DMK is column-major - DMK_ptr += col_ap * this->_paraV->nrow + row_ap; - for (int mu = 0; mu < row_size; ++mu) + kphase_vec[ik * r_size + ir] = std::complex(cosp, sinp); + } + } + + std::vector> tmp_DMK_mat(mat_size); + for(int ik = 0; ik < this->_nk; ++ik) + { + // copy column-major DMK to row-major tmp_DMK_mat (for the purpose of computational efficiency) + const std::complex* DMK_mat_ptr = this->_DMK[ik].data() + col_ap * this->_paraV->nrow + row_ap; + for(int icol = 0; icol < col_size; ++icol) + { + for(int irow = 0; irow < row_size; ++irow) { - BlasConnector::axpy(col_size, - kphase, - DMK_ptr, - ld_hk, - target_DMR_ptr, - 1); - DMK_ptr += 1; - target_DMR_ptr += col_size; + tmp_DMK_mat[irow * col_size + icol] = DMK_mat_ptr[icol * ld_hk + irow]; } } + + for(int ir = 0; ir < r_size; ++ir) + { + std::complex kphase = kphase_vec[ik * r_size + ir]; + std::complex* target_DMR_mat = target_DMR_mat_vec[ir]; + BlasConnector::axpy(mat_size, + kphase, + tmp_DMK_mat.data(), + 1, + target_DMR_mat, + 1); + } } } ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); From 1ab1998fc81c6a0a13bf9809ec6189aad15a5b53 Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Wed, 26 Mar 2025 01:52:10 +0800 Subject: [PATCH 8/9] add schedule(dynamic) --- source/module_elecstate/module_dm/density_matrix.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index c19e042318..7cb29e9524 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -71,7 +71,7 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) // set zero since this function is called in every scf step target_DMR->set_zero(); #ifdef _OPENMP -#pragma omp parallel for +#pragma omp parallel for schedule(dynamic) #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { @@ -233,12 +233,11 @@ void DensityMatrix, double>::cal_DMR_full(hamilt::HContaine ModuleBase::timer::tick("DensityMatrix", "cal_DMR_full"); int ld_hk = this->_paraV->nrow; - int ld_hk2 = 2 * ld_hk; hamilt::HContainer>* target_DMR = dmR_out; // set zero since this function is called in every scf step target_DMR->set_zero(); #ifdef _OPENMP -#pragma omp parallel for +#pragma omp parallel for schedule(dynamic) #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { @@ -336,7 +335,7 @@ void DensityMatrix::cal_DMR(const int ik_in) assert(this->_nk == 1); #endif #ifdef _OPENMP -#pragma omp parallel for +#pragma omp parallel for schedule(dynamic) #endif for (int i = 0; i < target_DMR->size_atom_pairs(); ++i) { From 9c57cbe72758ed10dd125cca506d3543bc9e1eab Mon Sep 17 00:00:00 2001 From: dzzz2001 Date: Wed, 26 Mar 2025 16:57:40 +0800 Subject: [PATCH 9/9] optimize func_folding --- .../module_dm/density_matrix.cpp | 9 ++-- .../module_hcontainer/atom_pair.h | 6 +++ .../module_hcontainer/func_folding.cpp | 47 ++++++++++++++++--- .../module_hcontainer/hcontainer_funcs.h | 4 +- 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/source/module_elecstate/module_dm/density_matrix.cpp b/source/module_elecstate/module_dm/density_matrix.cpp index 7cb29e9524..4521071d65 100644 --- a/source/module_elecstate/module_dm/density_matrix.cpp +++ b/source/module_elecstate/module_dm/density_matrix.cpp @@ -166,13 +166,10 @@ void DensityMatrix, double>::cal_DMR(const int ik_in) if(PARAM.inp.nspin != 4) { double* target_DMR_mat = target_DMR_mat_vec[ir]; - for(int irow = 0; irow < row_size; ++irow) + for(int i = 0; i < mat_size; i++) { - for(int icol = 0; icol < col_size; ++icol) - { - target_DMR_mat[irow * col_size + icol] += kphase.real() * tmp_DMK_mat[irow * col_size + icol].real() - - kphase.imag() * tmp_DMK_mat[irow * col_size + icol].imag(); - } + target_DMR_mat[i] += kphase.real() * tmp_DMK_mat[i].real() + - kphase.imag() * tmp_DMK_mat[i].imag(); } } else if(PARAM.inp.nspin == 4) { diff --git a/source/module_hamilt_lcao/module_hcontainer/atom_pair.h b/source/module_hamilt_lcao/module_hcontainer/atom_pair.h index ed6f205fa1..f0593641f9 100644 --- a/source/module_hamilt_lcao/module_hcontainer/atom_pair.h +++ b/source/module_hamilt_lcao/module_hcontainer/atom_pair.h @@ -118,6 +118,12 @@ class AtomPair */ void set_zero(); + /** + * @brief get begin index of this AtomPair + */ + int get_begin_row() const { return this->row_ap; } + int get_begin_col() const { return this->col_ap; } + /** * @brief get col_size for this AtomPair */ diff --git a/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp b/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp index af8f7c76d5..6d235b1d67 100644 --- a/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp +++ b/source/module_hamilt_lcao/module_hcontainer/func_folding.cpp @@ -15,7 +15,7 @@ template void folding_HR(const hamilt::HContainer& hR, std::complex* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type) { #ifdef _OPENMP @@ -24,9 +24,18 @@ void folding_HR(const hamilt::HContainer& hR, for (int i = 0; i < hR.size_atom_pairs(); ++i) { hamilt::AtomPair& tmp = hR.get_atom_pair(i); - for(int ir = 0;ir < tmp.get_R_size(); ++ir ) + const int row_size = tmp.get_row_size(); + const int col_size = tmp.get_col_size(); + // copy hk to hk_type + // hk_tmp is row-major and stored contiguously in memory, + // so copy hr to hk_tmp is faster than copy hr to hk + std::vector> hk_mat_tmp(row_size * col_size, 0); + + // copy hr to hk_tmp + for(int ir = 0; ir < tmp.get_R_size(); ++ir) { const ModuleBase::Vector3 r_index = tmp.get_R_index(ir); + TR* hr_mat = tmp.get_pointer(ir); // cal k_phase // if TK==std::complex, kphase is e^{ikR} const ModuleBase::Vector3 dR(r_index.x, r_index.y, r_index.z); @@ -34,9 +43,35 @@ void folding_HR(const hamilt::HContainer& hR, double sinp, cosp; ModuleBase::libm::sincos(arg, &sinp, &cosp); std::complex kphase = std::complex(cosp, sinp); + + for(int i = 0; i < row_size * col_size; ++i) + { + hk_mat_tmp[i] += kphase * hr_mat[i]; + } + } - tmp.find_R(r_index); - tmp.add_to_matrix(hk, ncol, kphase, hk_type); + // copy hk_tmp to hk + if (hk_type == 0) + { + std::complex* hk_mat = hk + tmp.get_begin_row() * hk_ld + tmp.get_begin_col(); + for(int irow = 0; irow < row_size; ++irow) + { + for(int icol = 0; icol < col_size; ++icol) + { + hk_mat[irow * hk_ld + icol] += hk_mat_tmp[irow * col_size + icol]; + } + } + } + else if(hk_type == 1) + { + std::complex* hk_mat = hk + tmp.get_begin_col() * hk_ld + tmp.get_begin_row(); + for(int icol = 0; icol < col_size; ++icol) + { + for(int irow = 0; irow < row_size; ++irow) + { + hk_mat[icol * hk_ld + irow] += hk_mat_tmp[irow * col_size + icol]; + } + } } } /*for (int i = 0; i < hR.size_R_loop(); ++i) @@ -82,7 +117,7 @@ template void folding_HR(const hamilt::HContainer& hR, void folding_HR(const hamilt::HContainer& hR, double* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type) { // in ABACUS, this function works with gamma-only case. @@ -97,7 +132,7 @@ void folding_HR(const hamilt::HContainer& hR, double kphase = 1.0; // Hk = HR - hR.get_atom_pair(i).add_to_matrix(hk, ncol, kphase, hk_type); + hR.get_atom_pair(i).add_to_matrix(hk, hk_ld , kphase, hk_type); } } diff --git a/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h b/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h index 31bf65e3da..2243f05ba3 100644 --- a/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h +++ b/source/module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h @@ -16,13 +16,13 @@ template void folding_HR(const hamilt::HContainer& hR, std::complex* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type); void folding_HR(const hamilt::HContainer& hR, double* hk, const ModuleBase::Vector3& kvec_d_in, - const int ncol, + const int hk_ld, const int hk_type); #ifdef __MPI