From 939c90d9053840c90cd283ae6691303c701a4eef Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Mon, 18 Dec 2023 16:06:11 +0800
Subject: [PATCH 1/8] add a new parameter mixing_angle for angle mixing in
 non-colinear calculations

---
 source/module_base/global_variable.cpp                  | 1 +
 source/module_base/global_variable.h                    | 1 +
 source/module_elecstate/module_charge/charge_mixing.cpp | 4 ++++
 source/module_io/input.cpp                              | 6 ++++++
 source/module_io/input.h                                | 1 +
 source/module_io/input_conv.cpp                         | 1 +
 source/module_io/parameter_pool.cpp                     | 4 ++++
 source/module_io/write_input.cpp                        | 1 +
 8 files changed, 19 insertions(+)

diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp
index 9706e51a0d..8aae856c0d 100644
--- a/source/module_base/global_variable.cpp
+++ b/source/module_base/global_variable.cpp
@@ -252,6 +252,7 @@ double MIXING_GG0 = 1.00;
 double MIXING_BETA_MAG = 1.6;
 double MIXING_GG0_MAG = 1.00;
 double MIXING_GG0_MIN = 0.1;
+double MIXING_ANGLE = 0.0;
 bool MIXING_TAU = 0;
 
 //==========================================================
diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
index 2134eab8e6..c0a67f7b1c 100644
--- a/source/module_base/global_variable.h
+++ b/source/module_base/global_variable.h
@@ -281,6 +281,7 @@ extern bool MIXING_TAU;
 extern double MIXING_BETA_MAG;
 extern double MIXING_GG0_MAG;
 extern double MIXING_GG0_MIN;
+extern double MIXING_ANGLE;
 
 //==========================================================
 // device flags added by denghui
diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index 102ee6e246..b4b18541ad 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -43,6 +43,10 @@ void Charge_Mixing::set_mixing(const std::string& mixing_mode_in,
         GlobalV::ofs_running<<"mixing_beta_mag: "<< this->mixing_beta_mag <<std::endl;
         GlobalV::ofs_running<<"mixing_gg0_mag: "<< GlobalV::MIXING_GG0_MAG <<std::endl;
     }
+    if (GlobalV::MIXING_ANGLE > 0)
+    {
+        GlobalV::ofs_running<<"mixing_angle: "<< GlobalV::MIXING_ANGLE <<std::endl;
+    }
     GlobalV::ofs_running<<"mixing_ndim: "<< this->mixing_ndim <<std::endl;
     GlobalV::ofs_running<<"----------- Double Check Mixing Parameters End ------------"<<std::endl;
 
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index 45860c817a..128eacb29b 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -304,6 +304,7 @@ void Input::Default(void)
     mixing_beta_mag = -10.0; // only set when nspin == 2 || nspin == 4
     mixing_gg0_mag = 0.0; // defaultly exclude Kerker from mixing magnetic density
     mixing_gg0_min = 0.1; // defaultly minimum kerker coefficient
+    mixing_angle = -10.0; // defaultly close for npsin = 4
     mixing_tau = false;
     mixing_dftu = false;
     //----------------------------------------------------------
@@ -1251,6 +1252,10 @@ bool Input::Read(const std::string &fn)
         {
             read_value(ifs, mixing_gg0_min);
         }
+        else if (strcmp("mixing_angle", word) == 0)
+        {
+            read_value(ifs, mixing_angle);
+        }
         else if (strcmp("mixing_tau", word) == 0)
         {
             read_bool(ifs, mixing_tau);
@@ -3175,6 +3180,7 @@ void Input::Bcast()
     Parallel_Common::bcast_double(mixing_beta_mag);
     Parallel_Common::bcast_double(mixing_gg0_mag);
     Parallel_Common::bcast_double(mixing_gg0_min);
+    Parallel_Common::bcast_double(mixing_angle);
     Parallel_Common::bcast_bool(mixing_tau);
     Parallel_Common::bcast_bool(mixing_dftu);
 
diff --git a/source/module_io/input.h b/source/module_io/input.h
index 53b914f415..bc56245a32 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -234,6 +234,7 @@ class Input
     double mixing_beta_mag;
     double mixing_gg0_mag;
     double mixing_gg0_min;
+    double mixing_angle;
 
     bool mixing_tau; // whether to mix tau in mgga
     bool mixing_dftu; //whether to mix locale in DFT+U
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index 73a057e171..fbe85f79c0 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -752,6 +752,7 @@ void Input_Conv::Convert(void)
     GlobalV::MIXING_BETA_MAG = INPUT.mixing_beta_mag;
     GlobalV::MIXING_GG0_MAG = INPUT.mixing_gg0_mag;
     GlobalV::MIXING_GG0_MIN = INPUT.mixing_gg0_min;
+    GlobalV::MIXING_ANGLE = INPUT.mixing_angle;
     GlobalV::MIXING_TAU = INPUT.mixing_tau;
     
     ModuleBase::timer::tick("Input_Conv", "Convert");
diff --git a/source/module_io/parameter_pool.cpp b/source/module_io/parameter_pool.cpp
index 8d7d0ca1b9..51bbffac7b 100644
--- a/source/module_io/parameter_pool.cpp
+++ b/source/module_io/parameter_pool.cpp
@@ -843,6 +843,10 @@ bool input_parameters_set(std::map<std::string, InputParameter> input_parameters
     {
         INPUT.mixing_gg0_min = *static_cast<double*>(input_parameters["mixing_gg0_min"].get());
     }
+    else if (input_parameters.count("mixing_angle") != 0)
+    {
+        INPUT.mixing_angle = *static_cast<double*>(input_parameters["mixing_angle"].get());
+    }
     else if (input_parameters.count("mixing_tau") != 0)
     {
         INPUT.mixing_tau = *static_cast<bool*>(input_parameters["mixing_tau"].get());
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index 89f94c411e..bd5378a086 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -250,6 +250,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta_mag", mixing_beta_mag, "mixing parameter for magnetic density");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_mag", mixing_gg0_mag, "mixing parameter in kerker");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_min", mixing_gg0_min, "the minimum kerker coefficient");
+    ModuleBase::GlobalFunc::OUTP(ofs, "mixing_angle", mixing_angle, "angle mixing parameter for non-colinear calculations");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_tau", mixing_tau, "whether to mix tau in mGGA calculation");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_dftu", mixing_dftu, "whether to mix locale in DFT+U calculation");
 

From 9856c264350269bfff3abec97331feee5fa83110 Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Tue, 19 Dec 2023 10:40:14 +0800
Subject: [PATCH 2/8] remove cal_coef && mix_data into every block

---
 .../module_charge/charge_mixing.cpp           | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index b4b18541ad..7e994f27a6 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -441,6 +441,10 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
         rhor_out = chr->rho[0];
         auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1);
         this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, true);    
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
     }
     else if (GlobalV::NSPIN == 2)
     {
@@ -469,6 +473,13 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
             }
         };
         this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true);
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
+        // delete
+        chr->get_rho_from_mag();
+        chr->destroy_rho_mag();
     }
     else if (GlobalV::NSPIN == 4)
     {
@@ -497,17 +508,12 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
             }
         };
         this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true);
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
     }
     
-    auto inner_product
-        = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
-    this->mixing->cal_coef(this->rho_mdata, inner_product);
-    this->mixing->mix_data(this->rho_mdata, rhor_out);
-    if (GlobalV::NSPIN == 2)
-    {
-        chr->get_rho_from_mag();
-        chr->destroy_rho_mag();
-    }
     chr->renormalize_rho();
     double *taur_out, *taur_in;
     if ((XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5) && mixing_tau)

From a0afc97699eb87e2285f67b3f03515e5ea9147fe Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Tue, 19 Dec 2023 13:24:01 +0800
Subject: [PATCH 3/8] implement mixing_angle method for LCAO basis

---
 .../module_charge/charge_mixing.cpp           | 115 +++++++++++++++---
 1 file changed, 100 insertions(+), 15 deletions(-)

diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index 7e994f27a6..e88547009d 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -78,14 +78,30 @@ void Charge_Mixing::set_mixing(const std::string& mixing_mode_in,
     }
 
     if (GlobalV::SCF_THR_TYPE == 1)
-    {
-        this->mixing->init_mixing_data(this->rho_mdata,
-                                       this->rhopw->npw * GlobalV::NSPIN,
-                                       sizeof(std::complex<double>));
+    {  
+        if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0 )
+        {
+            this->mixing->init_mixing_data(this->rho_mdata,
+                                        this->rhopw->npw * 2,
+                                        sizeof(std::complex<double>));
+        }
+        else
+        {
+            this->mixing->init_mixing_data(this->rho_mdata,
+                                        this->rhopw->npw * GlobalV::NSPIN,
+                                        sizeof(std::complex<double>));
+        }
     }
     else
     {
-        this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * GlobalV::NSPIN, sizeof(double));
+        if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0 )
+        {
+            this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * 2, sizeof(double));
+        }
+        else
+        {
+            this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * GlobalV::NSPIN, sizeof(double));
+        }
     }
 
 #ifdef USE_PAW
@@ -481,10 +497,9 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
         chr->get_rho_from_mag();
         chr->destroy_rho_mag();
     }
-    else if (GlobalV::NSPIN == 4)
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE <= 0)
     {
-        // I do not merge this part with nspin=2 because the method of nspin=2 is almost done,
-        // while nspin=4 is not finished yet. I will try more methods for nspin=4 in the future. 
+        // normal broyden mixing for {rho, mx, my, mz}
         rhor_in = chr->rho_save[0];
         rhor_out = chr->rho[0];
         const int nrxx = this->rhopw->nrxx;
@@ -513,6 +528,67 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
         this->mixing->cal_coef(this->rho_mdata, inner_product);
         this->mixing->mix_data(this->rho_mdata, rhor_out);
     }
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0)
+    {
+        // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706
+        // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed
+        const int nrxx = this->rhopw->nrxx;
+        // allocate memory for rho_magabs and rho_magabs_save
+        double* rho_magabs = new double[nrxx * 2];
+        double* rho_magabs_save = new double[nrxx * 2];
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx * 2);
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx * 2);
+        // calculate rho_magabs and rho_magabs_save
+        for (int ir = 0; ir < nrxx; ir++)
+        {
+            rho_magabs[ir] = chr->rho[0][ir]; // rho
+            rho_magabs_save[ir] = chr->rho_save[0][ir]; // rho_save
+            // |m| for rho
+            rho_magabs[nrxx + ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
+            // |m| for rho_save
+            rho_magabs_save[nrxx + ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] + chr->rho_save[2][ir] * chr->rho_save[2][ir] + chr->rho_save[3][ir] * chr->rho_save[3][ir]);
+        }
+        rhor_in = rho_magabs_save;
+        rhor_out = rho_magabs;
+        auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1);
+        auto twobeta_mix
+            = [this, nrxx](double* out, const double* in, const double* sres) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+            for (int i = 0; i < nrxx; ++i)
+            {
+                out[i] = in[i] + this->mixing_beta * sres[i];
+            }
+            // magnetism, |m|
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+            for (int i = nrxx; i < 2 * nrxx; ++i)
+            {
+                out[i] = in[i] + this->mixing_beta_mag * sres[i];
+            }
+        };
+        this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true);
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
+        // use new |m| and angle to update {mx, my, mz}
+        for (int ir = 0; ir < nrxx; ir++)
+        {
+            chr->rho[0][ir] = rho_magabs[ir]; // rho
+            double norm = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
+            if (norm < 1e-10) continue;
+            double rescale_tmp = rho_magabs[nrxx + ir] / norm; 
+            chr->rho[1][ir] *= rescale_tmp;
+            chr->rho[2][ir] *= rescale_tmp;
+            chr->rho[3][ir] *= rescale_tmp;
+        }
+        // delete
+        delete[] rho_magabs;
+        delete[] rho_magabs_save;
+    }
     
     chr->renormalize_rho();
     double *taur_out, *taur_in;
@@ -752,9 +828,13 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
 {
     if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1)
         return;
-    std::vector<std::complex<double>> drhog(this->rhopw->npw * GlobalV::NSPIN);
-    std::vector<double> drhor_filter(this->rhopw->nrxx * GlobalV::NSPIN);
-    for (int is = 0; is < GlobalV::NSPIN; ++is)
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
+    //
+    std::vector<std::complex<double>> drhog(this->rhopw->npw * GlobalV::NSPIN / resize_tmp);
+    std::vector<double> drhor_filter(this->rhopw->nrxx * GlobalV::NSPIN / resize_tmp);
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; ++is)
     {
         // Note after this process some G which is higher than Gmax will be filtered.
         // Thus we cannot use Kerker_screen_recip(drhog.data()) directly after it.
@@ -762,7 +842,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
     }
     // implement Kerker for density and magnetization separately
     double fac, gg0, amin;
-    for (int is = 0; is < GlobalV::NSPIN; is++)
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; is++)
     {
 
         if (is >= 1)
@@ -773,6 +853,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
                 assert(is == 1); // make sure break works
 #endif
                 double is_mag = GlobalV::NSPIN - 1;
+                if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) is_mag = 1;
                 for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++)
                 {
                     drhog[is * this->rhopw->npw + ig] = 0;
@@ -806,7 +887,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
         }
     }
     // inverse FT
-    for (int is = 0; is < GlobalV::NSPIN; ++is)
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; ++is)
     {
         this->rhopw->recip2real(drhog.data() + is * this->rhopw->npw, drhor_filter.data() + is * this->rhopw->nrxx);
     }
@@ -814,7 +895,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
 #ifdef _OPENMP
 #pragma omp parallel for schedule(static, 512)
 #endif
-    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN; ir++)
+    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN / resize_tmp; ir++)
     {
         drhor[ir] -= drhor_filter[ir];
     }
@@ -981,10 +1062,14 @@ double Charge_Mixing::inner_product_recip_new2(std::complex<double>* rhog1, std:
 double Charge_Mixing::inner_product_real(double* rho1, double* rho2)
 {
     double rnorm = 0.0;
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
+
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : rnorm)
 #endif
-    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN; ++ir)
+    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN / resize_tmp; ++ir)
     {
         rnorm += rho1[ir] * rho2[ir];
     }

From cb0439e5539da24b11bb3131a2c4d7e8009554cb Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Tue, 19 Dec 2023 16:09:21 +0800
Subject: [PATCH 4/8] remove cal_coef && mix_data into every block in
 mix_rho_recip_new()

---
 .../module_charge/charge_mixing.cpp           | 41 ++++++++-----------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index e88547009d..74ea6173bd 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -299,13 +299,23 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
 
     std::complex<double>* rhog_in = nullptr;
     std::complex<double>* rhog_out = nullptr;
-    
+
+    //  can choose inner_product_recip_new1 or inner_product_recip_new2
+    //  inner_product_recip_new1 is a simple sum
+    //  inner_product_recip_new2 is a hartree-like sum, unit is Ry
+    auto inner_product_new
+        = std::bind(&Charge_Mixing::inner_product_recip_new2, this, std::placeholders::_1, std::placeholders::_2);
+    auto inner_product_old
+        = std::bind(&Charge_Mixing::inner_product_recip, this, std::placeholders::_1, std::placeholders::_2);
+
     if (GlobalV::NSPIN == 1)
     {
         rhog_in = chr->rhog_save[0];
         rhog_out = chr->rhog[0];    
         auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip_new, this, std::placeholders::_1);
         this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, true);
+        this->mixing->cal_coef(this->rho_mdata, inner_product_old);
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
     }
     else if (GlobalV::NSPIN == 2)
     {
@@ -334,6 +344,11 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
                   }
               };
         this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true);
+        this->mixing->cal_coef(this->rho_mdata, inner_product_new);
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
+        // delete
+        chr->get_rhog_from_mag();
+        chr->destroy_rhog_mag();
     }
     else if (GlobalV::NSPIN == 4)
     {
@@ -362,30 +377,8 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
                   }
               };
         this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true);
-    }
-
-    //  can choose inner_product_recip_new1 or inner_product_recip_new2
-    //  inner_product_recip_new1 is a simple sum
-    //  inner_product_recip_new2 is a hartree-like sum, unit is Ry
-    auto inner_product_new
-        = std::bind(&Charge_Mixing::inner_product_recip_new2, this, std::placeholders::_1, std::placeholders::_2);
-    auto inner_product_old
-        = std::bind(&Charge_Mixing::inner_product_recip, this, std::placeholders::_1, std::placeholders::_2);
-    if (GlobalV::NSPIN == 2)
-    {
-        this->mixing->cal_coef(this->rho_mdata, inner_product_new);
-    }
-    else if (GlobalV::NSPIN == 1 || GlobalV::NSPIN == 4) // nspin=4 can use old inner_product
-    {
         this->mixing->cal_coef(this->rho_mdata, inner_product_old);
-    }
-
-    this->mixing->mix_data(this->rho_mdata, rhog_out);
-
-    if (GlobalV::NSPIN == 2)
-    {
-        chr->get_rhog_from_mag();
-        chr->destroy_rhog_mag();
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
     }
 
     // rhog to rho

From 7a71a0bf36e7963dcb00f52ad5381e90fae875ef Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Tue, 19 Dec 2023 17:41:55 +0800
Subject: [PATCH 5/8] implement mixing_angle method for PW basis

---
 .../module_charge/charge_mixing.cpp           | 78 +++++++++++++++++--
 1 file changed, 73 insertions(+), 5 deletions(-)

diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index 74ea6173bd..5e27c54cda 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -350,10 +350,9 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
         chr->get_rhog_from_mag();
         chr->destroy_rhog_mag();
     }
-    else if (GlobalV::NSPIN == 4)
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE <= 0)
     {
-        // I do not merge this part with nspin=2 because the method of nspin=2 is almost done,
-        // while nspin=4 is not finished yet. I will try more methods for nspin=4 in the future. 
+        // normal broyden mixing for {rho, mx, my, mz}
         rhog_in = chr->rhog_save[0];
         rhog_out = chr->rhog[0];
         const int npw = this->rhopw->npw;
@@ -380,6 +379,68 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
         this->mixing->cal_coef(this->rho_mdata, inner_product_old);
         this->mixing->mix_data(this->rho_mdata, rhog_out);
     }
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0)
+    {
+        // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706
+        // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed
+        const int npw = this->rhopw->npw;
+        // allocate memory for rho_magabs and rho_magabs_save
+        std::complex<double>* rhog_magabs = new std::complex<double>[npw * 2];
+        std::complex<double>* rhog_magabs_save = new std::complex<double>[npw * 2];
+        ModuleBase::GlobalFunc::ZEROS(rhog_magabs, npw * 2);
+        ModuleBase::GlobalFunc::ZEROS(rhog_magabs_save, npw * 2);
+        // calculate rho_magabs and rho_magabs_save
+        for (int ig = 0; ig < npw; ig++)
+        {
+            rhog_magabs[ig] = chr->rhog[0][ig]; // rho
+            rhog_magabs_save[ig] = chr->rhog_save[0][ig]; // rho_save
+            // |m| for rho
+            rhog_magabs[npw + ig] = std::sqrt(chr->rhog[1][ig] * chr->rhog[1][ig] + chr->rhog[2][ig] * chr->rhog[2][ig] + chr->rhog[3][ig] * chr->rhog[3][ig]);
+            // |m| for rho_save
+            rhog_magabs_save[npw + ig] = std::sqrt(chr->rhog_save[1][ig] * chr->rhog_save[1][ig] + chr->rhog_save[2][ig] * chr->rhog_save[2][ig] + chr->rhog_save[3][ig] * chr->rhog_save[3][ig]);
+        }
+        //
+        rhog_in = rhog_magabs_save;
+        rhog_out = rhog_magabs;
+        auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip_new, this, std::placeholders::_1); // use old one
+        auto twobeta_mix
+            = [this, npw](std::complex<double>* out, const std::complex<double>* in, const std::complex<double>* sres) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+                  for (int i = 0; i < npw; ++i)
+                  {
+                      out[i] = in[i] + this->mixing_beta * sres[i];
+                  }
+            // magnetism, |m|
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+                  for (int i = npw; i < 2 * npw; ++i)
+                  {
+                      out[i] = in[i] + this->mixing_beta_mag * sres[i];
+                  }
+              };
+        this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true);
+        auto inner_product_tmp
+            = std::bind(&Charge_Mixing::inner_product_recip_new1, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product_tmp);
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
+        // use new |m| and angle to update {mx, my, mz}
+        for (int ig = 0; ig < npw; ig++)
+        {
+            chr->rhog[0][ig] = rhog_magabs[ig]; // rhog
+            std::complex<double> norm = std::sqrt(chr->rhog[1][ig] * chr->rhog[1][ig] + chr->rhog[2][ig] * chr->rhog[2][ig] + chr->rhog[3][ig] * chr->rhog[3][ig]);
+            if (abs(norm) < 1e-10) continue;
+            std::complex<double> rescale_tmp = rhog_magabs[npw + ig] / norm; 
+            chr->rhog[1][ig] *= rescale_tmp;
+            chr->rhog[2][ig] *= rescale_tmp;
+            chr->rhog[3][ig] *= rescale_tmp;
+        }
+        // delete
+        delete[] rhog_magabs;
+        delete[] rhog_magabs_save;
+    }
 
     // rhog to rho
     for (int is = 0; is < GlobalV::NSPIN; is++)
@@ -776,8 +837,12 @@ void Charge_Mixing::Kerker_screen_recip_new(std::complex<double>* drhog)
         return;
     double fac, gg0, amin;
 
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
+
     // implement Kerker for density and magnetization separately
-    for (int is = 0; is < GlobalV::NSPIN; ++is)
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; ++is)
     {
         // new mixing method only support nspin=2 not nspin=4
         if (is >= 1)
@@ -980,10 +1045,13 @@ double Charge_Mixing::inner_product_recip(std::complex<double>* rho1, std::compl
 double Charge_Mixing::inner_product_recip_new1(std::complex<double>* rho1, std::complex<double>* rho2)
 {
     double rnorm = 0.0;
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : rnorm)
 #endif
-    for (int ig = 0; ig < this->rhopw->npw * GlobalV::NSPIN; ++ig)
+    for (int ig = 0; ig < this->rhopw->npw * GlobalV::NSPIN / resize_tmp; ++ig)
     {
         rnorm += (conj(rho1[ig]) * rho2[ig]).real();
     }

From 44ade696c2c4e3ed9b05745289a08ee03267d868 Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Wed, 20 Dec 2023 13:16:35 +0800
Subject: [PATCH 6/8] handle |m| in real space

---
 .../module_charge/charge_mixing.cpp           | 107 +++++++++++++++---
 1 file changed, 92 insertions(+), 15 deletions(-)

diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index 5e27c54cda..3380cb27e5 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -383,22 +383,35 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
     {
         // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706
         // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed
-        const int npw = this->rhopw->npw;
         // allocate memory for rho_magabs and rho_magabs_save
+        const int nrxx = this->rhopw->nrxx;
+        double* rho_magabs = new double[nrxx];
+        double* rho_magabs_save = new double[nrxx];
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx);
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx);
+        // calculate rho_magabs and rho_magabs_save
+        for (int ir = 0; ir < nrxx; ir++)
+        {
+            // |m| for rho
+            rho_magabs[ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
+            // |m| for rho_save
+            rho_magabs_save[ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] + chr->rho_save[2][ir] * chr->rho_save[2][ir] + chr->rho_save[3][ir] * chr->rho_save[3][ir]);
+        }
+        // allocate memory for rhog_magabs and rhog_magabs_save
+        const int npw = this->rhopw->npw;
         std::complex<double>* rhog_magabs = new std::complex<double>[npw * 2];
         std::complex<double>* rhog_magabs_save = new std::complex<double>[npw * 2];
         ModuleBase::GlobalFunc::ZEROS(rhog_magabs, npw * 2);
         ModuleBase::GlobalFunc::ZEROS(rhog_magabs_save, npw * 2);
-        // calculate rho_magabs and rho_magabs_save
+        // calculate rhog_magabs and rhog_magabs_save
         for (int ig = 0; ig < npw; ig++)
         {
             rhog_magabs[ig] = chr->rhog[0][ig]; // rho
             rhog_magabs_save[ig] = chr->rhog_save[0][ig]; // rho_save
-            // |m| for rho
-            rhog_magabs[npw + ig] = std::sqrt(chr->rhog[1][ig] * chr->rhog[1][ig] + chr->rhog[2][ig] * chr->rhog[2][ig] + chr->rhog[3][ig] * chr->rhog[3][ig]);
-            // |m| for rho_save
-            rhog_magabs_save[npw + ig] = std::sqrt(chr->rhog_save[1][ig] * chr->rhog_save[1][ig] + chr->rhog_save[2][ig] * chr->rhog_save[2][ig] + chr->rhog_save[3][ig] * chr->rhog_save[3][ig]);
         }
+        // FT to get rhog_magabs and rhog_magabs_save
+        this->rhopw->real2recip(rho_magabs, rhog_magabs + this->rhopw->npw);
+        this->rhopw->real2recip(rho_magabs_save, rhog_magabs_save + this->rhopw->npw);
         //
         rhog_in = rhog_magabs_save;
         rhog_out = rhog_magabs;
@@ -423,29 +436,40 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
               };
         this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true);
         auto inner_product_tmp
-            = std::bind(&Charge_Mixing::inner_product_recip_new1, this, std::placeholders::_1, std::placeholders::_2);
+            = std::bind(&Charge_Mixing::inner_product_recip_new2, this, std::placeholders::_1, std::placeholders::_2);
         this->mixing->cal_coef(this->rho_mdata, inner_product_tmp);
         this->mixing->mix_data(this->rho_mdata, rhog_out);
+        // get new |m| in real space using FT
+        this->rhopw->recip2real(rhog_magabs + this->rhopw->npw, rho_magabs);
         // use new |m| and angle to update {mx, my, mz}
         for (int ig = 0; ig < npw; ig++)
         {
             chr->rhog[0][ig] = rhog_magabs[ig]; // rhog
-            std::complex<double> norm = std::sqrt(chr->rhog[1][ig] * chr->rhog[1][ig] + chr->rhog[2][ig] * chr->rhog[2][ig] + chr->rhog[3][ig] * chr->rhog[3][ig]);
+            double norm = std::sqrt(chr->rho[1][ig] * chr->rho[1][ig] + chr->rho[2][ig] * chr->rho[2][ig] + chr->rho[3][ig] * chr->rho[3][ig]);
             if (abs(norm) < 1e-10) continue;
-            std::complex<double> rescale_tmp = rhog_magabs[npw + ig] / norm; 
-            chr->rhog[1][ig] *= rescale_tmp;
-            chr->rhog[2][ig] *= rescale_tmp;
-            chr->rhog[3][ig] *= rescale_tmp;
+            double rescale_tmp = rho_magabs[npw + ig] / norm; 
+            chr->rho[1][ig] *= rescale_tmp;
+            chr->rho[2][ig] *= rescale_tmp;
+            chr->rho[3][ig] *= rescale_tmp;
         }
         // delete
         delete[] rhog_magabs;
         delete[] rhog_magabs_save;
+        delete[] rho_magabs;
+        delete[] rho_magabs_save;
     }
 
     // rhog to rho
-    for (int is = 0; is < GlobalV::NSPIN; is++)
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0)
+    {
+        chr->rhopw->recip2real(chr->rhog[0], chr->rho[0]);
+    }
+    else
     {
-        chr->rhopw->recip2real(chr->rhog[is], chr->rho[is]);
+        for (int is = 0; is < GlobalV::NSPIN; is++)
+        {
+            chr->rhopw->recip2real(chr->rhog[is], chr->rho[is]);
+        }
     }
 
     // renormalize rho in R-space would induce a error in K-space
@@ -1069,6 +1093,8 @@ double Charge_Mixing::inner_product_recip_new2(std::complex<double>* rhog1, std:
 
     double sum = 0.0;
 
+    if (GlobalV::NSPIN==2)
+    {
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
@@ -1110,7 +1136,58 @@ double Charge_Mixing::inner_product_recip_new2(std::complex<double>* rhog1, std:
         // std::cout << " sum=" << sum << " mag=" << mag << std::endl;
         sum2 += mag;
         sum += sum2;
-
+    }
+    else if (GlobalV::NSPIN==4 && GlobalV::MIXING_ANGLE > 0)
+    {
+        if (!GlobalV::DOMAG && !GlobalV::DOMAG_Z)
+        {
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : sum)
+#endif
+            for (int ig = 0; ig < this->rhopw->npw; ++ig)
+            {
+                if (this->rhopw->gg[ig] < 1e-8)
+                    continue;
+                sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
+            }
+            sum *= fac;
+        }
+        else
+        {
+            // another part with magnetization
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : sum)
+#endif
+            for (int ig = 0; ig < this->rhopw->npw; ig++)
+            {
+                if (ig == this->rhopw->ig_gge0)
+                    continue;
+                sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
+            }
+            sum *= fac;
+            const int ig0 = this->rhopw->ig_gge0;
+            if (ig0 > 0)
+            {
+                sum += fac2
+                       * ((conj(rhog1[ig0 + this->rhopw->npw]) * rhog2[ig0 + this->rhopw->npw]).real());
+            }
+            double fac3 = fac2;
+            if (GlobalV::GAMMA_ONLY_PW)
+            {
+                fac3 *= 2.0;
+            }
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : sum)
+#endif
+            for (int ig = 0; ig < this->rhopw->npw; ig++)
+            {
+                if (ig == ig0)
+                    continue;
+                sum += fac3
+                       * ((conj(rhog1[ig + this->rhopw->npw]) * rhog2[ig + this->rhopw->npw]).real());
+            }
+        }
+    }
 #ifdef __MPI
     Parallel_Reduce::reduce_pool(sum);
 #endif

From d317268355499e0ad0210e6617843b9da21edf32 Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Wed, 20 Dec 2023 13:49:11 +0800
Subject: [PATCH 7/8] delete Charge_Mixing::Kerker_screen_real_test()

---
 .../module_charge/charge_mixing.cpp           | 67 -------------------
 1 file changed, 67 deletions(-)

diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index 3380cb27e5..acece06a04 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -983,73 +983,6 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
     }
 }
 
-// this test will be removed once new mixing method is finished
-void Charge_Mixing::Kerker_screen_real_test(double* drhor)
-{
-    // for total charge density
-    if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1)
-        return;
-    std::vector<double> drhor_filter(this->rhopw->nrxx);
-    std::vector<std::complex<double>> drhog(this->rhopw->npw);
-    // Note after this process some G which is higher than Gmax will be filtered.
-    // FT
-    this->rhopw->real2recip(drhor, drhog.data());
-    const double fac = this->mixing_gg0;
-    const double gg0 = std::pow(fac * 0.529177 / GlobalC::ucell.tpiba, 2);
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-    for (int ig = 0; ig < this->rhopw->npw; ig++)
-    {
-        double gg = this->rhopw->gg[ig];
-        double filter_g = std::max(gg / (gg + gg0), GlobalV::MIXING_GG0_MIN / this->mixing_beta);
-        drhog[ig] *= (1 - filter_g);
-    }
-    // inverse FT
-    this->rhopw->recip2real(drhog.data(), drhor_filter.data());
-
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-    for (int ir = 0; ir < this->rhopw->nrxx; ir++)
-    {
-        drhor[ir] -= drhor_filter[ir];
-    }
-
-    // for magnetic density
-    if (GlobalV::NSPIN == 2)
-    {
-        if (GlobalV::MIXING_GG0_MAG <= 0.0001 || GlobalV::MIXING_BETA_MAG <= 0.1)
-            return;
-        std::vector<double> drhor_mag_filter(this->rhopw->nrxx);
-        std::vector<std::complex<double>> drhog_mag(this->rhopw->npw);
-        // Note after this process some G which is higher than Gmax will be filtered.
-        // FT
-        this->rhopw->real2recip(drhor + this->rhopw->nrxx, drhog_mag.data());
-        const double fac_mag = GlobalV::MIXING_GG0_MAG;
-        const double gg0_mag = std::pow(fac_mag * 0.529177 / GlobalC::ucell.tpiba, 2);
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-        for (int ig = 0; ig < this->rhopw->npw; ig++)
-        {
-            double gg = this->rhopw->gg[ig];
-            double filter_g = std::max(gg / (gg + gg0_mag), GlobalV::MIXING_GG0_MIN / GlobalV::MIXING_BETA_MAG);
-            drhog_mag[ig] *= (1 - filter_g);
-        }
-        // inverse FT
-        this->rhopw->recip2real(drhog_mag.data(), drhor_mag_filter.data());
-        // value magnetism
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-        for (int ir = 0; ir < this->rhopw->nrxx; ir++)
-        {
-            drhor[ir + this->rhopw->nrxx] -= drhor_mag_filter[ir];
-        }
-    }
-}
-
 double Charge_Mixing::inner_product_recip(std::complex<double>* rho1, std::complex<double>* rho2)
 {
     std::complex<double>** rho1_2d = new std::complex<double>*[GlobalV::NSPIN];

From d56f3513c6b95b79697f1438f87d8d6c1edda0d3 Mon Sep 17 00:00:00 2001
From: weiqingzhou <weiqingzhou@whu.edu.cn>
Date: Wed, 20 Dec 2023 14:06:17 +0800
Subject: [PATCH 8/8] add corresponding docs of mixing_angle

---
 docs/advanced/input_files/input-main.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 17cb135d77..031f33b692 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -77,6 +77,7 @@
     - [mixing\_gg0\_min](#mixing_gg0_min)
     - [mixing\_tau](#mixing_tau)
     - [mixing\_dftu](#mixing_dftu)
+    - [mixing\_angle](#mixing_angle)
     - [gamma\_only](#gamma_only)
     - [printe](#printe)
     - [scf\_nmax](#scf_nmax)
@@ -1014,6 +1015,17 @@ We recommend the following options:
 - **Description**: the minimum kerker coefficient 
 - **Default**: 0.1
 
+### mixing_angle
+
+- **Type**: Real
+- **Availability**: Only relevant for non-colinear calculations `nspin=4`.
+- **Description**: Normal broyden mixing can give the converged result for a given magnetic configuration. If one is not interested in the energies of a given magnetic configuration but wants to determine the ground state by relaxing the magnetic moments’ directions, one cannot rely on the standard Broyden mixing algorithm. To enhance the ability to find correct magnetic configuration for non-colinear calculations, ABACUS implements a promising mixing method proposed by J. Phys. Soc. Jpn. 82 (2013) 114706. Here, `mixing_angle` is the angle mixing parameter. In fact, only `mixing_angle=1.0` is implemented currently.
+  - **<=0**: Normal broyden mixing for $m_{x}, m_{y}, m_{z}$
+  - **>0**: Angle mixing for the modulus $|m|$ with `mixing_angle=1.0`
+- **Default**: -10.0
+
+Note: In new angle mixing, you should set `mixing_beta_mag >> mixing_beta`. The setup of `mixing_beta=0.2`, `mixing_beta_mag=1.0` usually works well.
+
 ### mixing_tau
 
 - **Type**: Boolean