diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 75e4faaa03..f7eafed9cf 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -77,6 +77,7 @@
     - [mixing\_gg0\_min](#mixing_gg0_min)
     - [mixing\_tau](#mixing_tau)
     - [mixing\_dftu](#mixing_dftu)
+    - [mixing\_angle](#mixing_angle)
     - [gamma\_only](#gamma_only)
     - [printe](#printe)
     - [scf\_nmax](#scf_nmax)
@@ -1015,6 +1016,17 @@ We recommend the following options:
 - **Description**: the minimum kerker coefficient 
 - **Default**: 0.1
 
+### mixing_angle
+
+- **Type**: Real
+- **Availability**: Only relevant for non-colinear calculations `nspin=4`.
+- **Description**: Normal broyden mixing can give the converged result for a given magnetic configuration. If one is not interested in the energies of a given magnetic configuration but wants to determine the ground state by relaxing the magnetic moments’ directions, one cannot rely on the standard Broyden mixing algorithm. To enhance the ability to find correct magnetic configuration for non-colinear calculations, ABACUS implements a promising mixing method proposed by J. Phys. Soc. Jpn. 82 (2013) 114706. Here, `mixing_angle` is the angle mixing parameter. In fact, only `mixing_angle=1.0` is implemented currently.
+  - **<=0**: Normal broyden mixing for $m_{x}, m_{y}, m_{z}$
+  - **>0**: Angle mixing for the modulus $|m|$ with `mixing_angle=1.0`
+- **Default**: -10.0
+
+Note: In new angle mixing, you should set `mixing_beta_mag >> mixing_beta`. The setup of `mixing_beta=0.2`, `mixing_beta_mag=1.0` usually works well.
+
 ### mixing_tau
 
 - **Type**: Boolean
diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp
index 9706e51a0d..8aae856c0d 100644
--- a/source/module_base/global_variable.cpp
+++ b/source/module_base/global_variable.cpp
@@ -252,6 +252,7 @@ double MIXING_GG0 = 1.00;
 double MIXING_BETA_MAG = 1.6;
 double MIXING_GG0_MAG = 1.00;
 double MIXING_GG0_MIN = 0.1;
+double MIXING_ANGLE = 0.0;
 bool MIXING_TAU = 0;
 
 //==========================================================
diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h
index 2134eab8e6..c0a67f7b1c 100644
--- a/source/module_base/global_variable.h
+++ b/source/module_base/global_variable.h
@@ -281,6 +281,7 @@ extern bool MIXING_TAU;
 extern double MIXING_BETA_MAG;
 extern double MIXING_GG0_MAG;
 extern double MIXING_GG0_MIN;
+extern double MIXING_ANGLE;
 
 //==========================================================
 // device flags added by denghui
diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp
index 102ee6e246..acece06a04 100755
--- a/source/module_elecstate/module_charge/charge_mixing.cpp
+++ b/source/module_elecstate/module_charge/charge_mixing.cpp
@@ -43,6 +43,10 @@ void Charge_Mixing::set_mixing(const std::string& mixing_mode_in,
         GlobalV::ofs_running<<"mixing_beta_mag: "<< this->mixing_beta_mag <<std::endl;
         GlobalV::ofs_running<<"mixing_gg0_mag: "<< GlobalV::MIXING_GG0_MAG <<std::endl;
     }
+    if (GlobalV::MIXING_ANGLE > 0)
+    {
+        GlobalV::ofs_running<<"mixing_angle: "<< GlobalV::MIXING_ANGLE <<std::endl;
+    }
     GlobalV::ofs_running<<"mixing_ndim: "<< this->mixing_ndim <<std::endl;
     GlobalV::ofs_running<<"----------- Double Check Mixing Parameters End ------------"<<std::endl;
 
@@ -74,14 +78,30 @@ void Charge_Mixing::set_mixing(const std::string& mixing_mode_in,
     }
 
     if (GlobalV::SCF_THR_TYPE == 1)
-    {
-        this->mixing->init_mixing_data(this->rho_mdata,
-                                       this->rhopw->npw * GlobalV::NSPIN,
-                                       sizeof(std::complex<double>));
+    {  
+        if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0 )
+        {
+            this->mixing->init_mixing_data(this->rho_mdata,
+                                        this->rhopw->npw * 2,
+                                        sizeof(std::complex<double>));
+        }
+        else
+        {
+            this->mixing->init_mixing_data(this->rho_mdata,
+                                        this->rhopw->npw * GlobalV::NSPIN,
+                                        sizeof(std::complex<double>));
+        }
     }
     else
     {
-        this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * GlobalV::NSPIN, sizeof(double));
+        if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0 )
+        {
+            this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * 2, sizeof(double));
+        }
+        else
+        {
+            this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * GlobalV::NSPIN, sizeof(double));
+        }
     }
 
 #ifdef USE_PAW
@@ -279,13 +299,23 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
 
     std::complex<double>* rhog_in = nullptr;
     std::complex<double>* rhog_out = nullptr;
-    
+
+    //  can choose inner_product_recip_new1 or inner_product_recip_new2
+    //  inner_product_recip_new1 is a simple sum
+    //  inner_product_recip_new2 is a hartree-like sum, unit is Ry
+    auto inner_product_new
+        = std::bind(&Charge_Mixing::inner_product_recip_new2, this, std::placeholders::_1, std::placeholders::_2);
+    auto inner_product_old
+        = std::bind(&Charge_Mixing::inner_product_recip, this, std::placeholders::_1, std::placeholders::_2);
+
     if (GlobalV::NSPIN == 1)
     {
         rhog_in = chr->rhog_save[0];
         rhog_out = chr->rhog[0];    
         auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip_new, this, std::placeholders::_1);
         this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, true);
+        this->mixing->cal_coef(this->rho_mdata, inner_product_old);
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
     }
     else if (GlobalV::NSPIN == 2)
     {
@@ -314,11 +344,15 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
                   }
               };
         this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true);
+        this->mixing->cal_coef(this->rho_mdata, inner_product_new);
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
+        // delete
+        chr->get_rhog_from_mag();
+        chr->destroy_rhog_mag();
     }
-    else if (GlobalV::NSPIN == 4)
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE <= 0)
     {
-        // I do not merge this part with nspin=2 because the method of nspin=2 is almost done,
-        // while nspin=4 is not finished yet. I will try more methods for nspin=4 in the future. 
+        // normal broyden mixing for {rho, mx, my, mz}
         rhog_in = chr->rhog_save[0];
         rhog_out = chr->rhog[0];
         const int npw = this->rhopw->npw;
@@ -342,36 +376,100 @@ void Charge_Mixing::mix_rho_recip_new(Charge* chr)
                   }
               };
         this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true);
-    }
-
-    //  can choose inner_product_recip_new1 or inner_product_recip_new2
-    //  inner_product_recip_new1 is a simple sum
-    //  inner_product_recip_new2 is a hartree-like sum, unit is Ry
-    auto inner_product_new
-        = std::bind(&Charge_Mixing::inner_product_recip_new2, this, std::placeholders::_1, std::placeholders::_2);
-    auto inner_product_old
-        = std::bind(&Charge_Mixing::inner_product_recip, this, std::placeholders::_1, std::placeholders::_2);
-    if (GlobalV::NSPIN == 2)
-    {
-        this->mixing->cal_coef(this->rho_mdata, inner_product_new);
-    }
-    else if (GlobalV::NSPIN == 1 || GlobalV::NSPIN == 4) // nspin=4 can use old inner_product
-    {
         this->mixing->cal_coef(this->rho_mdata, inner_product_old);
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
     }
-
-    this->mixing->mix_data(this->rho_mdata, rhog_out);
-
-    if (GlobalV::NSPIN == 2)
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0)
     {
-        chr->get_rhog_from_mag();
-        chr->destroy_rhog_mag();
+        // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706
+        // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed
+        // allocate memory for rho_magabs and rho_magabs_save
+        const int nrxx = this->rhopw->nrxx;
+        double* rho_magabs = new double[nrxx];
+        double* rho_magabs_save = new double[nrxx];
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx);
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx);
+        // calculate rho_magabs and rho_magabs_save
+        for (int ir = 0; ir < nrxx; ir++)
+        {
+            // |m| for rho
+            rho_magabs[ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
+            // |m| for rho_save
+            rho_magabs_save[ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] + chr->rho_save[2][ir] * chr->rho_save[2][ir] + chr->rho_save[3][ir] * chr->rho_save[3][ir]);
+        }
+        // allocate memory for rhog_magabs and rhog_magabs_save
+        const int npw = this->rhopw->npw;
+        std::complex<double>* rhog_magabs = new std::complex<double>[npw * 2];
+        std::complex<double>* rhog_magabs_save = new std::complex<double>[npw * 2];
+        ModuleBase::GlobalFunc::ZEROS(rhog_magabs, npw * 2);
+        ModuleBase::GlobalFunc::ZEROS(rhog_magabs_save, npw * 2);
+        // calculate rhog_magabs and rhog_magabs_save
+        for (int ig = 0; ig < npw; ig++)
+        {
+            rhog_magabs[ig] = chr->rhog[0][ig]; // rho
+            rhog_magabs_save[ig] = chr->rhog_save[0][ig]; // rho_save
+        }
+        // FT to get rhog_magabs and rhog_magabs_save
+        this->rhopw->real2recip(rho_magabs, rhog_magabs + this->rhopw->npw);
+        this->rhopw->real2recip(rho_magabs_save, rhog_magabs_save + this->rhopw->npw);
+        //
+        rhog_in = rhog_magabs_save;
+        rhog_out = rhog_magabs;
+        auto screen = std::bind(&Charge_Mixing::Kerker_screen_recip_new, this, std::placeholders::_1); // use old one
+        auto twobeta_mix
+            = [this, npw](std::complex<double>* out, const std::complex<double>* in, const std::complex<double>* sres) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+                  for (int i = 0; i < npw; ++i)
+                  {
+                      out[i] = in[i] + this->mixing_beta * sres[i];
+                  }
+            // magnetism, |m|
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+                  for (int i = npw; i < 2 * npw; ++i)
+                  {
+                      out[i] = in[i] + this->mixing_beta_mag * sres[i];
+                  }
+              };
+        this->mixing->push_data(this->rho_mdata, rhog_in, rhog_out, screen, twobeta_mix, true);
+        auto inner_product_tmp
+            = std::bind(&Charge_Mixing::inner_product_recip_new2, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product_tmp);
+        this->mixing->mix_data(this->rho_mdata, rhog_out);
+        // get new |m| in real space using FT
+        this->rhopw->recip2real(rhog_magabs + this->rhopw->npw, rho_magabs);
+        // use new |m| and angle to update {mx, my, mz}
+        for (int ig = 0; ig < npw; ig++)
+        {
+            chr->rhog[0][ig] = rhog_magabs[ig]; // rhog
+            double norm = std::sqrt(chr->rho[1][ig] * chr->rho[1][ig] + chr->rho[2][ig] * chr->rho[2][ig] + chr->rho[3][ig] * chr->rho[3][ig]);
+            if (abs(norm) < 1e-10) continue;
+            double rescale_tmp = rho_magabs[npw + ig] / norm; 
+            chr->rho[1][ig] *= rescale_tmp;
+            chr->rho[2][ig] *= rescale_tmp;
+            chr->rho[3][ig] *= rescale_tmp;
+        }
+        // delete
+        delete[] rhog_magabs;
+        delete[] rhog_magabs_save;
+        delete[] rho_magabs;
+        delete[] rho_magabs_save;
     }
 
     // rhog to rho
-    for (int is = 0; is < GlobalV::NSPIN; is++)
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0)
+    {
+        chr->rhopw->recip2real(chr->rhog[0], chr->rho[0]);
+    }
+    else
     {
-        chr->rhopw->recip2real(chr->rhog[is], chr->rho[is]);
+        for (int is = 0; is < GlobalV::NSPIN; is++)
+        {
+            chr->rhopw->recip2real(chr->rhog[is], chr->rho[is]);
+        }
     }
 
     // renormalize rho in R-space would induce a error in K-space
@@ -437,6 +535,10 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
         rhor_out = chr->rho[0];
         auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1);
         this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, true);    
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
     }
     else if (GlobalV::NSPIN == 2)
     {
@@ -465,11 +567,17 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
             }
         };
         this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true);
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
+        // delete
+        chr->get_rho_from_mag();
+        chr->destroy_rho_mag();
     }
-    else if (GlobalV::NSPIN == 4)
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE <= 0)
     {
-        // I do not merge this part with nspin=2 because the method of nspin=2 is almost done,
-        // while nspin=4 is not finished yet. I will try more methods for nspin=4 in the future. 
+        // normal broyden mixing for {rho, mx, my, mz}
         rhor_in = chr->rho_save[0];
         rhor_out = chr->rho[0];
         const int nrxx = this->rhopw->nrxx;
@@ -493,17 +601,73 @@ void Charge_Mixing::mix_rho_real(Charge* chr)
             }
         };
         this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true);
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
     }
-    
-    auto inner_product
-        = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
-    this->mixing->cal_coef(this->rho_mdata, inner_product);
-    this->mixing->mix_data(this->rho_mdata, rhor_out);
-    if (GlobalV::NSPIN == 2)
+    else if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0)
     {
-        chr->get_rho_from_mag();
-        chr->destroy_rho_mag();
+        // special broyden mixing for {rho, |m|} proposed by J. Phys. Soc. Jpn. 82 (2013) 114706
+        // here only consider the case of mixing_angle = 1, which mean only change |m| and keep angle fixed
+        const int nrxx = this->rhopw->nrxx;
+        // allocate memory for rho_magabs and rho_magabs_save
+        double* rho_magabs = new double[nrxx * 2];
+        double* rho_magabs_save = new double[nrxx * 2];
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx * 2);
+        ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx * 2);
+        // calculate rho_magabs and rho_magabs_save
+        for (int ir = 0; ir < nrxx; ir++)
+        {
+            rho_magabs[ir] = chr->rho[0][ir]; // rho
+            rho_magabs_save[ir] = chr->rho_save[0][ir]; // rho_save
+            // |m| for rho
+            rho_magabs[nrxx + ir] = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
+            // |m| for rho_save
+            rho_magabs_save[nrxx + ir] = std::sqrt(chr->rho_save[1][ir] * chr->rho_save[1][ir] + chr->rho_save[2][ir] * chr->rho_save[2][ir] + chr->rho_save[3][ir] * chr->rho_save[3][ir]);
+        }
+        rhor_in = rho_magabs_save;
+        rhor_out = rho_magabs;
+        auto screen = std::bind(&Charge_Mixing::Kerker_screen_real, this, std::placeholders::_1);
+        auto twobeta_mix
+            = [this, nrxx](double* out, const double* in, const double* sres) {
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+            for (int i = 0; i < nrxx; ++i)
+            {
+                out[i] = in[i] + this->mixing_beta * sres[i];
+            }
+            // magnetism, |m|
+#ifdef _OPENMP
+#pragma omp parallel for schedule(static, 256)
+#endif
+            for (int i = nrxx; i < 2 * nrxx; ++i)
+            {
+                out[i] = in[i] + this->mixing_beta_mag * sres[i];
+            }
+        };
+        this->mixing->push_data(this->rho_mdata, rhor_in, rhor_out, screen, twobeta_mix, true);
+        auto inner_product
+            = std::bind(&Charge_Mixing::inner_product_real, this, std::placeholders::_1, std::placeholders::_2);
+        this->mixing->cal_coef(this->rho_mdata, inner_product);
+        this->mixing->mix_data(this->rho_mdata, rhor_out);
+        // use new |m| and angle to update {mx, my, mz}
+        for (int ir = 0; ir < nrxx; ir++)
+        {
+            chr->rho[0][ir] = rho_magabs[ir]; // rho
+            double norm = std::sqrt(chr->rho[1][ir] * chr->rho[1][ir] + chr->rho[2][ir] * chr->rho[2][ir] + chr->rho[3][ir] * chr->rho[3][ir]);
+            if (norm < 1e-10) continue;
+            double rescale_tmp = rho_magabs[nrxx + ir] / norm; 
+            chr->rho[1][ir] *= rescale_tmp;
+            chr->rho[2][ir] *= rescale_tmp;
+            chr->rho[3][ir] *= rescale_tmp;
+        }
+        // delete
+        delete[] rho_magabs;
+        delete[] rho_magabs_save;
     }
+    
     chr->renormalize_rho();
     double *taur_out, *taur_in;
     if ((XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5) && mixing_tau)
@@ -697,8 +861,12 @@ void Charge_Mixing::Kerker_screen_recip_new(std::complex<double>* drhog)
         return;
     double fac, gg0, amin;
 
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
+
     // implement Kerker for density and magnetization separately
-    for (int is = 0; is < GlobalV::NSPIN; ++is)
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; ++is)
     {
         // new mixing method only support nspin=2 not nspin=4
         if (is >= 1)
@@ -742,9 +910,13 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
 {
     if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1)
         return;
-    std::vector<std::complex<double>> drhog(this->rhopw->npw * GlobalV::NSPIN);
-    std::vector<double> drhor_filter(this->rhopw->nrxx * GlobalV::NSPIN);
-    for (int is = 0; is < GlobalV::NSPIN; ++is)
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
+    //
+    std::vector<std::complex<double>> drhog(this->rhopw->npw * GlobalV::NSPIN / resize_tmp);
+    std::vector<double> drhor_filter(this->rhopw->nrxx * GlobalV::NSPIN / resize_tmp);
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; ++is)
     {
         // Note after this process some G which is higher than Gmax will be filtered.
         // Thus we cannot use Kerker_screen_recip(drhog.data()) directly after it.
@@ -752,7 +924,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
     }
     // implement Kerker for density and magnetization separately
     double fac, gg0, amin;
-    for (int is = 0; is < GlobalV::NSPIN; is++)
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; is++)
     {
 
         if (is >= 1)
@@ -763,6 +935,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
                 assert(is == 1); // make sure break works
 #endif
                 double is_mag = GlobalV::NSPIN - 1;
+                if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) is_mag = 1;
                 for (int ig = 0; ig < this->rhopw->npw * is_mag; ig++)
                 {
                     drhog[is * this->rhopw->npw + ig] = 0;
@@ -796,7 +969,7 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
         }
     }
     // inverse FT
-    for (int is = 0; is < GlobalV::NSPIN; ++is)
+    for (int is = 0; is < GlobalV::NSPIN / resize_tmp; ++is)
     {
         this->rhopw->recip2real(drhog.data() + is * this->rhopw->npw, drhor_filter.data() + is * this->rhopw->nrxx);
     }
@@ -804,79 +977,12 @@ void Charge_Mixing::Kerker_screen_real(double* drhor)
 #ifdef _OPENMP
 #pragma omp parallel for schedule(static, 512)
 #endif
-    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN; ir++)
+    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN / resize_tmp; ir++)
     {
         drhor[ir] -= drhor_filter[ir];
     }
 }
 
-// this test will be removed once new mixing method is finished
-void Charge_Mixing::Kerker_screen_real_test(double* drhor)
-{
-    // for total charge density
-    if (this->mixing_gg0 <= 0.0001 || this->mixing_beta <= 0.1)
-        return;
-    std::vector<double> drhor_filter(this->rhopw->nrxx);
-    std::vector<std::complex<double>> drhog(this->rhopw->npw);
-    // Note after this process some G which is higher than Gmax will be filtered.
-    // FT
-    this->rhopw->real2recip(drhor, drhog.data());
-    const double fac = this->mixing_gg0;
-    const double gg0 = std::pow(fac * 0.529177 / GlobalC::ucell.tpiba, 2);
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-    for (int ig = 0; ig < this->rhopw->npw; ig++)
-    {
-        double gg = this->rhopw->gg[ig];
-        double filter_g = std::max(gg / (gg + gg0), GlobalV::MIXING_GG0_MIN / this->mixing_beta);
-        drhog[ig] *= (1 - filter_g);
-    }
-    // inverse FT
-    this->rhopw->recip2real(drhog.data(), drhor_filter.data());
-
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-    for (int ir = 0; ir < this->rhopw->nrxx; ir++)
-    {
-        drhor[ir] -= drhor_filter[ir];
-    }
-
-    // for magnetic density
-    if (GlobalV::NSPIN == 2)
-    {
-        if (GlobalV::MIXING_GG0_MAG <= 0.0001 || GlobalV::MIXING_BETA_MAG <= 0.1)
-            return;
-        std::vector<double> drhor_mag_filter(this->rhopw->nrxx);
-        std::vector<std::complex<double>> drhog_mag(this->rhopw->npw);
-        // Note after this process some G which is higher than Gmax will be filtered.
-        // FT
-        this->rhopw->real2recip(drhor + this->rhopw->nrxx, drhog_mag.data());
-        const double fac_mag = GlobalV::MIXING_GG0_MAG;
-        const double gg0_mag = std::pow(fac_mag * 0.529177 / GlobalC::ucell.tpiba, 2);
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-        for (int ig = 0; ig < this->rhopw->npw; ig++)
-        {
-            double gg = this->rhopw->gg[ig];
-            double filter_g = std::max(gg / (gg + gg0_mag), GlobalV::MIXING_GG0_MIN / GlobalV::MIXING_BETA_MAG);
-            drhog_mag[ig] *= (1 - filter_g);
-        }
-        // inverse FT
-        this->rhopw->recip2real(drhog_mag.data(), drhor_mag_filter.data());
-        // value magnetism
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 512)
-#endif
-        for (int ir = 0; ir < this->rhopw->nrxx; ir++)
-        {
-            drhor[ir + this->rhopw->nrxx] -= drhor_mag_filter[ir];
-        }
-    }
-}
-
 double Charge_Mixing::inner_product_recip(std::complex<double>* rho1, std::complex<double>* rho2)
 {
     std::complex<double>** rho1_2d = new std::complex<double>*[GlobalV::NSPIN];
@@ -896,10 +1002,13 @@ double Charge_Mixing::inner_product_recip(std::complex<double>* rho1, std::compl
 double Charge_Mixing::inner_product_recip_new1(std::complex<double>* rho1, std::complex<double>* rho2)
 {
     double rnorm = 0.0;
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : rnorm)
 #endif
-    for (int ig = 0; ig < this->rhopw->npw * GlobalV::NSPIN; ++ig)
+    for (int ig = 0; ig < this->rhopw->npw * GlobalV::NSPIN / resize_tmp; ++ig)
     {
         rnorm += (conj(rho1[ig]) * rho2[ig]).real();
     }
@@ -917,6 +1026,8 @@ double Charge_Mixing::inner_product_recip_new2(std::complex<double>* rhog1, std:
 
     double sum = 0.0;
 
+    if (GlobalV::NSPIN==2)
+    {
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : sum)
 #endif
@@ -958,7 +1069,58 @@ double Charge_Mixing::inner_product_recip_new2(std::complex<double>* rhog1, std:
         // std::cout << " sum=" << sum << " mag=" << mag << std::endl;
         sum2 += mag;
         sum += sum2;
-
+    }
+    else if (GlobalV::NSPIN==4 && GlobalV::MIXING_ANGLE > 0)
+    {
+        if (!GlobalV::DOMAG && !GlobalV::DOMAG_Z)
+        {
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : sum)
+#endif
+            for (int ig = 0; ig < this->rhopw->npw; ++ig)
+            {
+                if (this->rhopw->gg[ig] < 1e-8)
+                    continue;
+                sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
+            }
+            sum *= fac;
+        }
+        else
+        {
+            // another part with magnetization
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : sum)
+#endif
+            for (int ig = 0; ig < this->rhopw->npw; ig++)
+            {
+                if (ig == this->rhopw->ig_gge0)
+                    continue;
+                sum += (conj(rhog1[ig]) * rhog2[ig]).real() / this->rhopw->gg[ig];
+            }
+            sum *= fac;
+            const int ig0 = this->rhopw->ig_gge0;
+            if (ig0 > 0)
+            {
+                sum += fac2
+                       * ((conj(rhog1[ig0 + this->rhopw->npw]) * rhog2[ig0 + this->rhopw->npw]).real());
+            }
+            double fac3 = fac2;
+            if (GlobalV::GAMMA_ONLY_PW)
+            {
+                fac3 *= 2.0;
+            }
+#ifdef _OPENMP
+#pragma omp parallel for reduction(+ : sum)
+#endif
+            for (int ig = 0; ig < this->rhopw->npw; ig++)
+            {
+                if (ig == ig0)
+                    continue;
+                sum += fac3
+                       * ((conj(rhog1[ig + this->rhopw->npw]) * rhog2[ig + this->rhopw->npw]).real());
+            }
+        }
+    }
 #ifdef __MPI
     Parallel_Reduce::reduce_pool(sum);
 #endif
@@ -971,10 +1133,14 @@ double Charge_Mixing::inner_product_recip_new2(std::complex<double>* rhog1, std:
 double Charge_Mixing::inner_product_real(double* rho1, double* rho2)
 {
     double rnorm = 0.0;
+    // consider a resize for mixing_angle
+    int resize_tmp = 1;
+    if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0) resize_tmp = 2;
+
 #ifdef _OPENMP
 #pragma omp parallel for reduction(+ : rnorm)
 #endif
-    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN; ++ir)
+    for (int ir = 0; ir < this->rhopw->nrxx * GlobalV::NSPIN / resize_tmp; ++ir)
     {
         rnorm += rho1[ir] * rho2[ir];
     }
diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp
index e5baa87284..a1ddf64f9e 100644
--- a/source/module_io/input.cpp
+++ b/source/module_io/input.cpp
@@ -305,6 +305,7 @@ void Input::Default(void)
     mixing_beta_mag = -10.0; // only set when nspin == 2 || nspin == 4
     mixing_gg0_mag = 0.0; // defaultly exclude Kerker from mixing magnetic density
     mixing_gg0_min = 0.1; // defaultly minimum kerker coefficient
+    mixing_angle = -10.0; // defaultly close for npsin = 4
     mixing_tau = false;
     mixing_dftu = false;
     //----------------------------------------------------------
@@ -1256,6 +1257,10 @@ bool Input::Read(const std::string &fn)
         {
             read_value(ifs, mixing_gg0_min);
         }
+        else if (strcmp("mixing_angle", word) == 0)
+        {
+            read_value(ifs, mixing_angle);
+        }
         else if (strcmp("mixing_tau", word) == 0)
         {
             read_bool(ifs, mixing_tau);
@@ -3181,6 +3186,7 @@ void Input::Bcast()
     Parallel_Common::bcast_double(mixing_beta_mag);
     Parallel_Common::bcast_double(mixing_gg0_mag);
     Parallel_Common::bcast_double(mixing_gg0_min);
+    Parallel_Common::bcast_double(mixing_angle);
     Parallel_Common::bcast_bool(mixing_tau);
     Parallel_Common::bcast_bool(mixing_dftu);
 
diff --git a/source/module_io/input.h b/source/module_io/input.h
index 709fdf67dd..c1440628dd 100644
--- a/source/module_io/input.h
+++ b/source/module_io/input.h
@@ -235,6 +235,7 @@ class Input
     double mixing_beta_mag;
     double mixing_gg0_mag;
     double mixing_gg0_min;
+    double mixing_angle;
 
     bool mixing_tau; // whether to mix tau in mgga
     bool mixing_dftu; //whether to mix locale in DFT+U
diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp
index 73a057e171..fbe85f79c0 100644
--- a/source/module_io/input_conv.cpp
+++ b/source/module_io/input_conv.cpp
@@ -752,6 +752,7 @@ void Input_Conv::Convert(void)
     GlobalV::MIXING_BETA_MAG = INPUT.mixing_beta_mag;
     GlobalV::MIXING_GG0_MAG = INPUT.mixing_gg0_mag;
     GlobalV::MIXING_GG0_MIN = INPUT.mixing_gg0_min;
+    GlobalV::MIXING_ANGLE = INPUT.mixing_angle;
     GlobalV::MIXING_TAU = INPUT.mixing_tau;
     
     ModuleBase::timer::tick("Input_Conv", "Convert");
diff --git a/source/module_io/parameter_pool.cpp b/source/module_io/parameter_pool.cpp
index b40f2c3f49..45d2262f94 100644
--- a/source/module_io/parameter_pool.cpp
+++ b/source/module_io/parameter_pool.cpp
@@ -847,6 +847,10 @@ bool input_parameters_set(std::map<std::string, InputParameter> input_parameters
     {
         INPUT.mixing_gg0_min = *static_cast<double*>(input_parameters["mixing_gg0_min"].get());
     }
+    else if (input_parameters.count("mixing_angle") != 0)
+    {
+        INPUT.mixing_angle = *static_cast<double*>(input_parameters["mixing_angle"].get());
+    }
     else if (input_parameters.count("mixing_tau") != 0)
     {
         INPUT.mixing_tau = *static_cast<bool*>(input_parameters["mixing_tau"].get());
diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp
index c94931dc99..4dd0a7f125 100644
--- a/source/module_io/write_input.cpp
+++ b/source/module_io/write_input.cpp
@@ -250,6 +250,7 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_beta_mag", mixing_beta_mag, "mixing parameter for magnetic density");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_mag", mixing_gg0_mag, "mixing parameter in kerker");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_gg0_min", mixing_gg0_min, "the minimum kerker coefficient");
+    ModuleBase::GlobalFunc::OUTP(ofs, "mixing_angle", mixing_angle, "angle mixing parameter for non-colinear calculations");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_tau", mixing_tau, "whether to mix tau in mGGA calculation");
     ModuleBase::GlobalFunc::OUTP(ofs, "mixing_dftu", mixing_dftu, "whether to mix locale in DFT+U calculation");