diff --git a/ABACUS.develop/see.txt b/ABACUS.develop/see.txt
new file mode 100644
index 0000000000..d569f4508f
--- /dev/null
+++ b/ABACUS.develop/see.txt
@@ -0,0 +1,17 @@
+diff -r source/src_pw/unitcell_pseudo.cpp /media/qianrui/work/LQR/test/abacus-develop/ABACUS.develop/source/src_pw/unitcell_pseudo.cpp
+1264,1265c1264
+< 	if ( CALCULATION!="scf-sto" && CALCULATION!="relax-sto" && CALCULATION!="md-sto" ) //qianrui 2021-2-20
+< 	{
+---
+> 	
+1291,1295c1290,1293
+< 		if(NBANDS < mag.get_neldw() )
+<         {
+<             WARNING_QUIT("unitcell","Too few spin down bands!");
+<         }
+< 	}
+---
+> 		if(NBANDS < mag.get_neldw() ) 
+> 		{
+> 			WARNING_QUIT("unitcell","Too few spin down bands!");
+> 		}
diff --git a/ABACUS.develop/source/Makefile.vars b/ABACUS.develop/source/Makefile.vars
index 83a160f5f2..4a9d6c2b19 100644
--- a/ABACUS.develop/source/Makefile.vars
+++ b/ABACUS.develop/source/Makefile.vars
@@ -28,7 +28,7 @@ ELPA_DIR = /home/mohan/1_Software/impi_elpa-16.05.005
 #ELPA_DIR = /opt/elpa/intel_2017_update4
 
 CEREAL_DIR = /home/mohan/1_Software/ABACUS_Github/cereal/cereal/
+#CEREAL_DIR = /home/qianrui/intelcompile/cereal/
 
 OBJ_DIR = obj
 NP      = 14
-
diff --git a/ABACUS.develop/source/input.cpp b/ABACUS.develop/source/input.cpp
index 89d6e0cf51..fe4a8a8b55 100644
--- a/ABACUS.develop/source/input.cpp
+++ b/ABACUS.develop/source/input.cpp
@@ -2384,6 +2384,14 @@ void Input::Check(void)
 */
                 nstep = 1;
 
+    }
+	else if (calculation == "scf-sto")  // qianrui 2021-2-20
+    {
+                if(mem_saver == 1)
+                {
+                        mem_saver = 0;
+                        AUTO_SET("mem_savre","0");
+                }
     }
     else if (calculation == "relax")  // pengfei 2014-10-13
     {
diff --git a/ABACUS.develop/source/run_pw.cpp b/ABACUS.develop/source/run_pw.cpp
index 2eaee9b228..bf4031e8f8 100644
--- a/ABACUS.develop/source/run_pw.cpp
+++ b/ABACUS.develop/source/run_pw.cpp
@@ -75,7 +75,14 @@ void Run_pw::plane_wave_line(void)
     //=====================================
     CHR.allocate(NSPIN, pw.nrxx, pw.ngmc);
     pot.allocate(pw.nrxx);
-    wf.allocate(kv.nks);
+    if ( NBANDS != 0 || (CALCULATION!="scf-sto" && CALCULATION!="relax-sto" && CALCULATION!="md-sto") )//qianrui add 
+	{
+        wf.allocate(kv.nks);
+    }
+    else
+    {
+        wf.npwx = wf.setupIndGk(pw, kv.nks);
+    }
 	UFFT.allocate();
 
     //=======================
@@ -116,7 +123,12 @@ void Run_pw::plane_wave_line(void)
     //================================
     // Initial start wave functions
     //================================
-   	wf.wfcinit();
+    if ( NBANDS != 0 || (CALCULATION!="scf-sto" && CALCULATION!="relax-sto" && CALCULATION!="md-sto") )//qianrui add 
+	{
+    	wf.wfcinit();
+    }
+    
+    
 
 	switch(exx_global.info.hybrid_type)				// Peize Lin add 2019-03-09
 	{
diff --git a/ABACUS.develop/source/src_global/global_function.h b/ABACUS.develop/source/src_global/global_function.h
index 74bddf3725..fc9835478b 100644
--- a/ABACUS.develop/source/src_global/global_function.h
+++ b/ABACUS.develop/source/src_global/global_function.h
@@ -184,7 +184,7 @@ void SCAN_END(ifstream &ifs, const string &TargetName);
 template<class T>
 static inline void DCOPY( const T &a, T &b, const int &dim)
 {
-    for (int i=0; i<dim; i++) b[i] = a[i];
+    for (int i=0; i<dim; ++i) b[i] = a[i];
 }
 
 void BLOCK_HERE( const string &description );
diff --git a/ABACUS.develop/source/src_pw/energy.cpp b/ABACUS.develop/source/src_pw/energy.cpp
index baa0b85cd2..71f6ed5b6e 100644
--- a/ABACUS.develop/source/src_pw/energy.cpp
+++ b/ABACUS.develop/source/src_pw/energy.cpp
@@ -87,6 +87,18 @@ void energy::calculate_etot(void)
 	}			
 
     //Quxin adds for DFT+U energy correction on 20201029
+
+	/*cout << resetiosflags(ios::scientific) << endl;
+	cout << setprecision(6) << endl;
+	cout << " eband=" << eband << endl;
+	cout << " deband=" << deband << endl;
+	cout << " etxc-etxcc=" << etxc-etxcc << endl;
+	cout << " ewld=" << ewld << endl;
+	cout << " ehart=" << ehart << endl;
+	cout << " demet=" << demet << endl;
+	cout << " descf=" << descf << endl;
+	cout << " efiled=" << Efield::etotefield << endl;
+	cout << " fermienergy= "<<ef<<endl;*/
     if(INPUT.dft_plus_u) 
 	{
 		this->etot += dftu.EU;																	  
diff --git a/ABACUS.develop/source/src_pw/pseudopot_cell_vnl.h b/ABACUS.develop/source/src_pw/pseudopot_cell_vnl.h
index 4f04c19aff..20cc80db72 100644
--- a/ABACUS.develop/source/src_pw/pseudopot_cell_vnl.h
+++ b/ABACUS.develop/source/src_pw/pseudopot_cell_vnl.h
@@ -29,6 +29,7 @@ class pseudopot_cell_vnl: public pseudopot_cell_vl
 	friend class Hamilt_PW;
 	friend class WF_atomic;
 	friend class wavefunc;
+	friend class Stochastic_hchi;
 
 	void init(const int ntype, const bool allocate_vkb=1);
 
diff --git a/ABACUS.develop/source/src_pw/sto_che.cpp b/ABACUS.develop/source/src_pw/sto_che.cpp
index a935432ce7..3b8caae50e 100644
--- a/ABACUS.develop/source/src_pw/sto_che.cpp
+++ b/ABACUS.develop/source/src_pw/sto_che.cpp
@@ -1,12 +1,155 @@
-#include "tools.h"
-#include "global.h"
 #include "sto_che.h" 
+#include "global.h"
 
 
 Stochastic_Chebychev::Stochastic_Chebychev()
 {
+    initplan = false;
+    initcoef = false;
+    getcoef = false;
+    getpolyval = false;
+    extend = 10;
+    norder = 5;
+    ccoef = new complex<double> [1];
+    coef = new double [1];
+    polyvalue = new complex<double> [1];
 }
 
 Stochastic_Chebychev::~Stochastic_Chebychev()
 {
+    if(initplan)
+    {
+        fftw_destroy_plan(plancoef);
+    }
+    delete [] coef;
+    delete [] ccoef;
+    delete [] polyvalue;
+}
+void Stochastic_Chebychev:: init()
+{
+    norder = STO_WF.nche_sto;
+    assert(norder > 5);
+    assert(extend >= 1);
+    if(norder != 0)
+    {
+        norder2 = 2 * norder * extend;
+        delete[] coef;
+        delete[] ccoef;
+        delete[] polyvalue;
+        ccoef = new complex<double> [norder2];
+        coef = new double [norder2];
+        polyvalue = new complex<double> [norder];
+        initcoef = true;
+    }
+    else
+    {
+        WARNING_QUIT("Stochastic_Chebychev", "The Chebychev expansion order should be at least one!");
+    }
+
+}
+    
+void Stochastic_Chebychev:: calcoef(double fun(double))
+{
+    if(!initcoef) WARNING_QUIT("Stochastic_Chebychev", "Please init coef first!");
+    for(int i = 0; i < norder2; ++i)
+    {
+        coef[i]=fun(cos((i+0.5)*TWO_PI/norder2));
+    }
+     if(!initplan)
+    {
+        initplan = true;
+        plancoef = fftw_plan_dft_r2c_1d(norder2, coef, (fftw_complex *) ccoef, FFTW_MEASURE);
+    }
+    fftw_execute(plancoef);
+    complex<double> ui(0,1);
+    for(int i = 0; i<norder; ++i)
+    {
+        if(i == 0)
+        {
+            coef[i] = real(exp(ui*i*PI/norder2) * ccoef[i]) / norder2;
+        }
+        else
+        {
+            coef[i] = real(exp(ui*i*PI/norder2) * ccoef[i]) * 2 / norder2;
+        }
+    }
+    getcoef = true;
+}
+
+void Stochastic_Chebychev:: recurs(double&tnp1, double& tn, double& tn_1, double &t)
+{   
+    tnp1 = 2*t*tn-tn_1;
+}
+
+complex<double> Stochastic_Chebychev:: calresult()
+{
+    if(!getcoef||!getpolyval) WARNING_QUIT("Stochastic_Chebychev", "Please calculate coef or polyval first!");
+    complex<double> result = 0;
+    for(int ior = 0; ior < norder; ++ior)
+    {
+        result += coef[ior] * polyvalue[ior];
+    }
+    return result;   
+}
+
+void Stochastic_Chebychev:: calresult(double &t, double& result)
+{
+    if(!getcoef) WARNING_QUIT("Stochastic_Chebychev", "Please calculate coef first!");
+    double tnp1, tn, tn_1;
+    tn_1 = 1;
+    tn = tn_1 * t;
+    //0- & 1-st order
+    result = coef[0] * tn_1 + coef[1] * tn;
+   
+    //more than 1-st orders
+    for(int ior = 2; ior < norder; ++ior)
+    {
+        recurs(tnp1, tn, tn_1, t);
+        result += coef[ior] * tnp1;
+        tn_1 = tn;
+        tn = tnp1; 
+    }
+    return;
+}
+
+
+void Stochastic_Chebychev:: calpolyval(void tfun(complex<double> *in, complex<double> *out), int& ndim, complex<double> *wavein)
+{
+
+    complex<double> *arraynp1, *arrayn, *arrayn_1;
+    arraynp1 = new complex<double> [ndim];
+    arrayn = new complex<double> [ndim];
+    arrayn_1 = new complex<double> [ndim];
+
+    DCOPY(wavein, arrayn_1, ndim);
+    tfun(arrayn_1, arrayn);
+
+    ZEROS(polyvalue,norder);
+    //0- & 1-st order
+    for(int i = 0; i < ndim; ++i) 
+    {
+        polyvalue[0] += conj(wavein[i]) * wavein[i];// 0-th order : <wavein | wavein> = ndim
+        polyvalue[1] += conj(wavein[i]) * arrayn[i];// 1-st order : <wavein | tfun | wavein>
+    }
+
+    //more than 1-st orders
+    for(int ior = 2; ior < norder; ++ior)
+    {
+        recurs(arraynp1, arrayn, arrayn_1, tfun, ndim);
+        for(int i = 0; i < ndim; ++i) // n-th order : <wavein | T_n(tfun) | wavein>
+        {
+            polyvalue[ior] += conj(wavein[i]) * arraynp1[i];
+        }
+        
+        complex<double>* tem = arrayn_1;
+        arrayn_1 = arrayn;
+        arrayn = arraynp1;
+        arraynp1 = tem; 
+    }
+
+    delete [] arraynp1;
+    delete [] arrayn;
+    delete [] arrayn_1;
+    getpolyval = true;
+    return;
 }
diff --git a/ABACUS.develop/source/src_pw/sto_che.h b/ABACUS.develop/source/src_pw/sto_che.h
index 811634189e..ed89500e2f 100644
--- a/ABACUS.develop/source/src_pw/sto_che.h
+++ b/ABACUS.develop/source/src_pw/sto_che.h
@@ -2,7 +2,7 @@
 #define INCLUDE_STO_CHEBYCHEV_H
 
 #include "tools.h"
-
+#include "../src_global/mymath.h"
 //----------------------------------------------
 // Chebychev Filtering
 //----------------------------------------------
@@ -15,12 +15,87 @@ class Stochastic_Chebychev
     // constructor and deconstructor
     Stochastic_Chebychev();
     ~Stochastic_Chebychev();
+    void init();
+    
+    void calcoef(double fun(double));
+    complex<double> calresult();
+    void calresult(double &t, double &result);
+    
+    template<class T>
+    void calresult(void fun(T *in, T *out), int& ndim, T *wavein, T *waveout);
+
+
+
+    void calpolyval(void fun(complex<double> *in, complex<double> *out), int& ndim, complex<double> *wavein);
+
     int norder;
+    int extend;
+    int norder2;  // 2 * norder
+    double* coef;  //[norder2] expansion coefficient of each order, only first norder coefficients are usefull
+    complex<double> *ccoef;  //[norder2] temporary complex expansion coefficient of each order, only first norder coefficients are usefull.
+    complex<double> *polyvalue; //
+    fftw_plan plancoef;
+    bool initplan, initcoef, getcoef, getpolyval;
 
 	private:
-
+    void recurs(double&tnp1, double &tn, double &tn_1, double& t); //tnp1: T_(n+1), tn: T_n, tn_1: T_(n-1)
+    template<class T>
+    void recurs(T *arraynp1, T* arrayn, T *arrayn_1, void fun(T *in,T *out), int& ndim);
 
 
 };
 
+template<class T>
+void Stochastic_Chebychev:: recurs(T *arraynp1, T* arrayn, T *arrayn_1, void fun(T *in,T *out), int& ndim)
+{
+    fun(arrayn,arraynp1);
+    for(int i = 0; i < ndim; ++i)
+    {
+        arraynp1[i]=2*arraynp1[i]-arrayn_1[i];
+    }
+}
+
+template<class T>
+void Stochastic_Chebychev:: calresult(void tfun(T *in, T *out), int &ndim, T *wavein, T *waveout)
+{
+    if(!getcoef) WARNING_QUIT("Stochastic_Chebychev", "Please calculate coef first!");
+
+    T *arraynp1, *arrayn, *arrayn_1;
+    arraynp1 = new T [ndim];
+    arrayn = new T [ndim];
+    arrayn_1 = new T [ndim];
+    DCOPY(wavein, arrayn_1, ndim);
+    tfun(arrayn_1, arrayn);
+    
+    //0- & 1-st order
+    for(int i = 0; i < ndim; ++i)
+    {
+        waveout[i] = coef[0] * arrayn_1[i] + coef[1] * arrayn[i];
+    }
+
+    
+
+    //more than 1-st orders
+    for(int ior = 2; ior < norder; ++ior)
+    {
+        recurs(arraynp1, arrayn, arrayn_1, tfun, ndim);
+        for(int i = 0; i < ndim; ++i)
+        {
+            waveout[i] += coef[ior] * arraynp1[i];
+        }
+        T * tem = arrayn_1;
+        arrayn_1 = arrayn;
+        arrayn = arraynp1;
+        arraynp1 = tem; 
+    }
+    delete [] arraynp1;
+    delete [] arrayn;
+    delete [] arrayn_1;
+    return;
+}
+
+
+
+
+
 #endif// Eelectrons_Chebychev
diff --git a/ABACUS.develop/source/src_pw/sto_elec.cpp b/ABACUS.develop/source/src_pw/sto_elec.cpp
index e365786f77..3e4d2ad74c 100644
--- a/ABACUS.develop/source/src_pw/sto_elec.cpp
+++ b/ABACUS.develop/source/src_pw/sto_elec.cpp
@@ -59,8 +59,9 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
 
 	clock_t start,finish;
 	double duration = 0.0;	
-	
-    for (this->iter = 1;iter <= NITER;iter++)
+    //for (this->iter = 1;iter <= NITER;iter++)
+	STO_WF.init();
+	for (this->iter = 1;iter <= 20;iter++)
     {
 		ofs_running 
 		<< "\n PW-STOCHASTIC ALGO --------- ION=" << setw(4) << istep + 1
@@ -75,7 +76,7 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
 		{
 			CHR.new_e_iteration = false;
 		}
-
+		
 		// record the start time.
         start=std::clock();
 		
@@ -84,7 +85,8 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
         //this->update_ethr(iter);
         if(FINAL_SCF && iter==1) 
 		{
-			ETHR = 1.0e-2;
+			ETHR = 1.0e-12; // only for test
+			//ETHR = 1.0e-2;
 		}
         else 
 		{
@@ -106,8 +108,12 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
 		
 		//(2) calculate band energy using cg or davidson method.
 		// output the new eigenvalues and wave functions.
-        this->c_bands(istep);
 
+		if(NBANDS > 0)
+		{
+			this->c_bands(istep+1);
+		}
+		
         if (check_stop_now()) return;
         
 		en.eband  = 0.0;
@@ -115,19 +121,37 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
         en.ef     = 0.0;
         en.ef_up  = 0.0;
         en.ef_dw  = 0.0;
-        
-		//(3) calculate weights of each band.
-		Occupy::calculate_weights();	
 
-		//(4) save change density as previous charge,
+		//(3) save change density as previous charge,
 		// prepared fox mixing.
         CHR.save_rho_before_sum_band();
 
-		//(5) calculate new charge density according to
-		// new wave functions.
+		//(4) calculate fermi energy.
+		stoiter.init();
+		stoiter.test();
+		stoiter.itermu();
 		
-		// calculate the new eband here.
-        CHR.sum_band();
+
+		//(5) calculate new charge density 
+		// calculate KS rho.
+		if(NBANDS > 0)
+		{
+			CHR.sum_band();
+		}
+		else
+		{
+			for(int is=0; is<NSPIN; is++)
+			{
+				ZEROS(CHR.rho[is], pw.nrxx);
+			}
+			en.eband = 0.0;
+		}
+		
+        
+		// calculate stochastic rho
+		stoiter.sum_stoband();
+		
+
 
 		//(6) calculate the delta_harris energy 
 		// according to new charge density.
@@ -216,14 +240,9 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
             //( vnew used later for scf correction to the forces )
 	    	pot.vnew = pot.vr - pot.vnew;
             en.descf = 0.0;
+
         }
 		
-		stringstream ssw;
-        ssw << global_out_dir << "WAVEFUNC";
-		
-		//qianrui add 2020-10-12
-		stringstream ssgk;
-		ssgk << global_out_dir << "GKK.dat";
             
 
 		// output for tmp.
@@ -234,14 +253,7 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
 			CHR.write_rho( is, iter, ssc.str(), 3);//mohan add 2007-10-17
 		}
         
-		if(wf.out_wf!=0)
-		{
-			//WF_io::write_wfc( ssw.str(), wf.evc );
-			// mohan update 2011-02-21
-			//qianrui update 2020-10-17
-			WF_io::write_wfc2( ssw.str(), wf.evc ,pw.gcar );
-            //DONE(ofs_running,"write wave functions into file WAVEFUNC.dat");
-		}
+		
 
 		if(vext == 0) 
 		{
@@ -281,11 +293,12 @@ void Stochastic_Elec::scf_stochastic(const int &istep)
 			timer::tick("electrons","self_consistent",'D');
             return;
         }
-
+		
         //if ( imix >= 0 )  CHR.rho = CHR.rho_save;
         //ofs_running << "\n start next iterate for idum ";
+		
     } 
-
+	
 	timer::tick("Elec_Stochastic","scf_stochastic",'D');
     return;
 } // end electrons
diff --git a/ABACUS.develop/source/src_pw/sto_elec.h b/ABACUS.develop/source/src_pw/sto_elec.h
index 7ed66853bf..eb15a7e46b 100644
--- a/ABACUS.develop/source/src_pw/sto_elec.h
+++ b/ABACUS.develop/source/src_pw/sto_elec.h
@@ -4,6 +4,7 @@
 #include "tools.h"
 #include "threshold_elec.h"
 #include "sto_wf.h"
+#include "sto_iter.h"
 
 //----------------------------------------------
 // methods based on stochastic wave functions
@@ -25,8 +26,7 @@ class Stochastic_Elec: private Threshold_Elec
     void scf_stochastic(const int &istep);
 
 	private:
-
-	Stochastic_WF swf;
+    Stochastic_Iter stoiter;
 
     void c_bands(const int &istep);
 
diff --git a/ABACUS.develop/source/src_pw/sto_hchi.cpp b/ABACUS.develop/source/src_pw/sto_hchi.cpp
index 7b0777f8b8..f3ee168e98 100644
--- a/ABACUS.develop/source/src_pw/sto_hchi.cpp
+++ b/ABACUS.develop/source/src_pw/sto_hchi.cpp
@@ -2,37 +2,355 @@
 #include "global.h"
 #include "sto_hchi.h" 
 
-Stochastic_Hchi::Stochastic_Hchi()
+int Stochastic_hchi:: nrxx;
+int Stochastic_hchi:: nx,Stochastic_hchi::ny,Stochastic_hchi::nz;
+fftw_plan Stochastic_hchi:: pb, Stochastic_hchi::pf;
+double Stochastic_hchi:: Emin, Stochastic_hchi:: Emax;
+bool Stochastic_hchi:: initplan, Stochastic_hchi::ortho;
+complex<double>* Stochastic_hchi:: rp_chi, * Stochastic_hchi::rl_chi;
+int * Stochastic_hchi:: GRA_index;
+
+
+Stochastic_hchi::Stochastic_hchi()
 {
+	initplan = false;
+	ortho = false;
+	nrxx = 0;
+	rp_chi = new complex<double> [1];
+	rl_chi = new complex<double> [1];
+	GRA_index = new int [1];
 }
 
-Stochastic_Hchi::~Stochastic_Hchi()
+Stochastic_hchi::~Stochastic_hchi()
 {
+	if(initplan)
+	{
+		fftw_destroy_plan(pb);
+		fftw_destroy_plan(pf);
+	}
+	delete[] rp_chi;
+	delete[] rl_chi;
 }
 
-void Stochastic_Hchi:: Hchi()
+void Stochastic_hchi:: init()
 {
+	//wait for init--------------------------------------
+		//nrxx
+	//---------------------------------------------------
+	nrxx = pw.nrxx;
+	nx = pw.nx;
+	ny = pw.ny;
+	nz = pw.nz;
+    if(nrxx != 0)
+    {
+        delete[] rp_chi;
+        delete[] rl_chi;
+		delete[] GRA_index;
+        rp_chi = new complex<double> [nrxx];
+		rl_chi = new complex<double> [nrxx];
+		GRA_index = new int [wf.npw];
+		pf=fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *)rp_chi,(fftw_complex *)rp_chi, FFTW_FORWARD, FFTW_MEASURE);
+		pb=fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *)rl_chi,(fftw_complex *)rl_chi, FFTW_BACKWARD, FFTW_MEASURE);
+		initplan = true;
+    }
+    else
+    {
+        WARNING_QUIT("Stochastic_hchi", "Number of grids should be at least one!");
+    }
+
+}
+
+void Stochastic_hchi::get_GRA_index()
+{
+	int ix,iy,iz;
+	int ir;
+	ZEROS(GRA_index,wf.npw);
+	for(int ig = 0 ; ig < wf.npw; ++ig)
+	{
+		ix = floor(pw.gcar[wf.igk(0, ig)].x+0.1);
+		iy = floor(pw.gcar[wf.igk(0, ig)].y+0.1);
+		iz = floor(pw.gcar[wf.igk(0, ig)].z+0.1);
+		if(ix < 0) ix += nx;
+		if(iy < 0) iy += ny;
+		if(iz < 0) iz += nz;
+		ir = ix * ny * nz + iy * nz + iz;
+		GRA_index[ig] = ir;
+	}
+}
+
+void Stochastic_hchi::orthogonal_to_psi_real(complex<double> *wfin, complex<double> *wfout, int &ikk)
+{
+
+	TITLE("Stochastic_hchi","orthogonal_to_psi0");
+	if(!initplan) WARNING_QUIT("Stochastic_hchi", "Please init hchi first!");
+
+	DCOPY(wfin,rp_chi,nrxx);
+	fftw_execute(pf);
+	
+	complex<double> * chig = new complex<double> [wf.npw];
+	for(int ig = 0; ig < wf.npw; ++ig)
+	{
+		chig[ig] = rp_chi[GRA_index[ig]]; 
+	}
+
+	//orthogonal part
+	complex<double> sum;
+	for(int iksb = 0; iksb < NBANDS; ++iksb)
+	{
+		complex<double> *kswf = &wf.evc[ikk](iksb,0); 
+		sum=0;
+		for(int ig = 0; ig < wf.npw; ++ig)
+		{
+			sum += conj(kswf[ig]) * chig[ig];
+		}
+		for(int ig = 0; ig < wf.npw; ++ig)
+		{
+			chig[ig] -= sum*kswf[ig];
+		}
+	}
+
+	//test orthogonal in reciprocal space
+	//complex<double> overlap;
+	//for(int iksb = 0; iksb < NBANDS; ++iksb)
+	//{
+	//	complex<double> *kswf = &wf.evc[ikk](iksb,0); 
+	//	overlap=0;
+	//	for(int ig = 0; ig < wf.npw; ++ig)
+	//	{
+	//		overlap += conj(kswf[ig]) * chig[ig];
+	//	}
+	//	cout<<"OVERLAP "<<overlap<<endl;
+	//}
+
+	for(int ig = 0; ig < wf.npw; ++ig)
+	{
+		rp_chi[GRA_index[ig]] = chig[ig];
+	}
+	DCOPY(rp_chi,rl_chi,nrxx);
+
+	fftw_execute(pb);
+	for(int ir = 0; ir < nrxx ; ++ir)
+	{
+		wfout[ir] = rl_chi[ir] / nrxx;
+	}
+
+	/*//test orthogonal in real space
+	complex<double> overlap;
+	complex<double> * kswf = new complex<double> [nrxx];
+	fftw_plan pp=fftw_plan_dft_3d(pw.nx,pw.ny,pw.nz,(fftw_complex *)kswf,(fftw_complex *)kswf, FFTW_BACKWARD, FFTW_ESTIMATE);
+	for(int iksb = 0; iksb < NBANDS; ++iksb)
+	{
+		ZEROS(kswf,nrxx);
+		for(int ig = 0 ; ig < wf.npw; ++ig)
+		{
+			kswf[GRA_index[ig]] = wf.evc[ikk](iksb,ig);
+		}
+		fftw_execute(pp);
+		overlap=0;
+		for(int ir = 0; ir < nrxx; ++ir)
+		{
+			overlap += conj(kswf[ir]) * wfout[ir];
+		}
+		cout<<"OVERLAP "<<overlap<<endl;
+	}
+	delete [] kswf;*/
+
+	ortho = true;
+	delete[] chig;
+	return;
+}
+
+
+
+
+void Stochastic_hchi:: hchi_real(complex<double>*chi_in, complex<double> *hchi)
+{
+	
+	double*vr = pot.vrs1;  //vr= pot.vrs1 temporarily use cutoff vr.
+
+	//wait for init--------------------------------------	
+	double dk1,dk2,dk3;
+	dk1 = ucell.tpiba;
+	dk2 = ucell.tpiba;
+	dk3 = ucell.tpiba;
+		
+	//---------------------------------------------------
+	if(!initplan) WARNING_QUIT("Stochastic_hchi", "Please init hchi first!");
+
+	ZEROS(hchi,nrxx);
+	DCOPY(chi_in, rp_chi, nrxx);
+	fftw_execute(pf);
+
+	complex<double> * chig = new complex<double> [wf.npw];
+	ZEROS(chig,wf.npw);
+	for(int ig = 0; ig < wf.npw; ++ig)
+	{
+		chig[ig] = rp_chi[GRA_index[ig]]; 
+	}
+
+	//------------------------------------
+	//(1) the local potential.
+	//------------------------------------
+
+	if(VL_IN_H)
+	{
+		for(int ir = 0; ir < nrxx; ++ir)
+		{
+			hchi[ir] += chi_in[ir] * vr[ir] ;
+		}
+		
+	}
+	/*cout<<"HCHI-------------------------"<<endl;
+
+		for(int ir  = 63800 ; ir < 64000; ++ir)
+		{
+			cout<<chi_in[ir] * vr[ir]<<" ";
+		}
+		cout<<endl;*/
+	
+	
+	
 	//------------------------------------
-	//(1) the kinetical energy.
+	//(2) the kinetical energy.
 	//------------------------------------
 	if(T_IN_H)
 	{
+		Vector3<double> gg;
+		int gx,gy,gz;
+		for(int ig1 = 0, i = 0; ig1 < nx; ++ig1)
+		{
+			for(int ig2 = 0; ig2 < ny; ++ig2)
+			{
+				for(int ig3 = 0; ig3 < nz; ++ig3)
+				{
+					gx = ig1;
+					gy = ig2;
+					gz = ig3;
+					if(ig1 > nx/2) gx -= nx;
+					if(ig2 > ny/2) gy -= ny;
+					if(ig3 > nz/2) gz -= nz;
+					gg.set(gx*dk1, gy*dk2, gz*dk3);
+					rl_chi[i] = gg.norm2()*rp_chi[i];
+					++i;
+				}
+			}
 		
+		}
 	}
 
+	
+	
+
 	//------------------------------------
-	//(2) the local potential.
+	// (3) the nonlocal pseudopotential.
 	//------------------------------------
-	if(VL_IN_H)
+	if(VNL_IN_H)
 	{
+		if ( ppcell.nkb > 0)
+		{
+			complex<double> *becp = new complex<double>[ ppcell.nkb * NPOL ];
+			ZEROS(becp,ppcell.nkb * NPOL);
+
+			for (int i=0;i< ppcell.nkb;++i)
+			{
+				const complex<double>* p = &ppcell.vkb(i,0);
+				for (int ig=0; ig< wf.npw; ++ig)
+				{
+					if(NSPIN!=4) becp[i] += chig[ig] * conj( p[ig] );
+					else
+					{
+						//We didnot consider it temporarily.
+					}	
+				} 
+			}
+
+			//Parallel_Reduce::reduce_complex_double_pool( becp, ppcell.nkb * NPOL);
+			complex<double> * Ps = new complex<double> [ppcell.nkb * NPOL];
+			ZEROS( Ps, ppcell.nkb * NPOL );
+			int sum = 0;
+    		int iat = 0;
+    		// this function sum up each non-local pseudopotential located in each atom,
+    		// all we need to do is put the right Dij coefficient to each becp, which
+    		// is calculated before.
+    		for (int it=0; it<ucell.ntype; ++it)
+    		{
+    		    const int Nprojs = ucell.atoms[it].nh;
+    		    for (int ia=0; ia<ucell.atoms[it].na; ++ia)
+    		    {
+    		        // each atom has Nprojs, means this is with structure factor;
+    		        // each projector (each atom) must multiply coefficient
+    		        // with all the other projectors.
+    		        for (int ip=0; ip<Nprojs; ++ip)
+    		        {
+    		            for (int ip2=0; ip2<Nprojs; ++ip2)
+    		            {
+							if(NSPIN!=4)
+								Ps[sum+ip2] += ppcell.deeq(CURRENT_SPIN, iat, ip, ip2) * becp[sum+ip];
+    		            }// end ih
+    		        }//end jh
+					if(NSPIN!=4) sum += Nprojs;
+					++iat;
+    		    } //end na
+    		} //end nt
 
+			// use simple method.
+			ZEROS(chig, wf.npw);
+			if(NSPIN!=4)
+				for(int i=0; i<ppcell.nkb; ++i)
+				{
+					complex<double>* p = &ppcell.vkb(i,0);
+					for(int ig=0; ig< wf.npw; ++ig)
+					{
+						chig[ig] += Ps[i] * p[ig];
+					}
+				}
+			delete[] becp;
+			delete[] Ps;
+		}
+		for(int ig = 0; ig < wf.npw; ++ig)
+		{
+			rl_chi[GRA_index[ig]] += chig[ig];
+		}
 	}
-}
 
-void Stochastic_Hchi::orthogonal_to_psi()
-{
-	TITLE("Stochastic_Hchi","orthogonal_to_psi0");
+	
+	//------------------------------------
+	// (4) Conver (2) & (3) in Reciprocal space to Real one
+	//------------------------------------
+	fftw_execute(pb);
+	for(int i = 0; i < nrxx; ++i)
+	{
+		hchi[i] += rl_chi[i] / nrxx;
+	}
 
+	
+	double Ebar = (Emin + Emax)/2;
+	double DeltaE = (Emax - Emin)/2;
+	for(int i = 0; i < nrxx; ++i)
+	{
+		hchi[i] = (hchi[i] - Ebar * chi_in[i]) / DeltaE;
+	}
+	delete [] chig;
+	//test Emax & Emin
+	//------------------------------------------------------------
+	//double sum1 = 0;
+	//double sum2 = 0;
+	//for(int i = 0 ; i < nrxx; ++i)
+	//{
+	//	sum1 += norm(chi_in[i]);
+	//	sum2 += real(conj(chi_in[i]) * hchi[i]);
+	//}
+	//cout<<sum2 << " "<<sum1<<" "<<sum2/sum1<<"  dddd"<<endl;
+	//------------------------------------------------------------
 
+	//test hermit property
+	//------------------------------------------------------------
+	//complex<double> sum=0;
+	//for(int i = 0 ; i < nrxx; ++i)
+	//{
+	//	sum+=conj(chi_in[i]) * hchi[i];
+	//}
+	//cout<<sum<<" must be real numebr."<<endl; //sum must be a real number
+	//------------------------------------------------------------
 	return;
 }
diff --git a/ABACUS.develop/source/src_pw/sto_hchi.h b/ABACUS.develop/source/src_pw/sto_hchi.h
index 2b0572d834..671c4d7d30 100644
--- a/ABACUS.develop/source/src_pw/sto_hchi.h
+++ b/ABACUS.develop/source/src_pw/sto_hchi.h
@@ -3,34 +3,49 @@
 #include "tools.h"
 
 //-----------------------------------------------------
-// H * chi
+// h * chi
 // chi: stochastic wave functions
 //
-// H: the Hamiltonian matrix, which is decomposed 
-// into the electron kinetic, effective potential V(r),
+// h: the normalized Hamiltonian matrix, which equals to (H - E\bar) / \DeltaE
+// where H is decomposed into the electron kinetic, effective potential V(r),
 // and the non-local pseudopotentials.
 // The effective potential = Local pseudopotential +
 // Hartree potential + Exchange-correlation potential
 //------------------------------------------------------
 
-class Stochastic_Hchi
+class Stochastic_hchi
 {
 
 	public:
 
     // constructor and deconstructor
-    Stochastic_Hchi();
-    ~Stochastic_Hchi();
-	void Hchi();
+    Stochastic_hchi();
+    ~Stochastic_hchi();
+	static void init();
+	static int nrxx;
+	static int nx,ny,nz;
+	static fftw_plan pf,pb;
+	static double Emin, Emax;
+	static bool initplan,ortho;
+	static complex<double> *rp_chi,*rl_chi;
+
+	static int * GRA_index;
+	static void get_GRA_index();
+
+	static void orthogonal_to_psi_real(complex<double>* wfin, complex<double> *wfout, int& ikk); //wfin & wfout are wavefunctions in real space
+	static void hchi_real(complex<double> *wfin, complex<double> *wfout); //wfin & wfout are wavefunctions in real space
+	
+	static void orthogonal_to_psi_reciprocal(complex<double>* wfin, complex<double> *wfout, int& ikk); //wfin & wfout are wavefunctions in reciprocal space
+	static void hchi_reciprocal(complex<double> *wfin, complex<double> *wfout); //wfin & wfout are wavefunctions in reciprocal space
 	
 
 	private:
 
 	// chi should be orthogonal to psi (generated by diaganolization methods,
 	// such as CG)
-	void orthogonal_to_psi();
+	
 
 
 };
 
-#endif// Eelectrons_Hchi
+#endif// Eelectrons_hchi
diff --git a/ABACUS.develop/source/src_pw/sto_iter.cpp b/ABACUS.develop/source/src_pw/sto_iter.cpp
index 6cb3f891bf..a579886577 100644
--- a/ABACUS.develop/source/src_pw/sto_iter.cpp
+++ b/ABACUS.develop/source/src_pw/sto_iter.cpp
@@ -1,11 +1,482 @@
+#include "tools.h"
 #include "global.h"
-#include "sto_iter.h" 
+#include "sto_iter.h"
+#include "occupy.h" 
 
+double Stochastic_Iter:: mu;
+double Stochastic_Iter:: mu0;
+double Stochastic_Iter:: Emin;
+double Stochastic_Iter:: Emax;
 
 Stochastic_Iter::Stochastic_Iter()
 {
+    mu0 = 0;
+    spolyv = new double [1];
 }
 
 Stochastic_Iter::~Stochastic_Iter()
 {
 }
+void Stochastic_Iter:: init()
+{
+    nchip = STO_WF.nchip;
+    //wait for init
+    targetne = ucell.nelec;
+    stoche.init();
+    stohchi.init();
+    stohchi.get_GRA_index();
+    delete [] spolyv;
+    int norder = stoche.norder;
+    spolyv = new double [norder];
+
+}
+
+void Stochastic_Iter:: itermu()
+{
+    
+
+    int nkk=1;// We temporarily use gamma k point.
+    //orthogonal part
+    if(NBANDS > 0)
+    {
+        for(int ik = 0; ik < nkk; ++ik)
+        {
+            for(int ichi = 0; ichi < nchip; ++ichi)
+            {
+                complex<double> * p0 = &STO_WF.chi0[ik](ichi,0);
+                complex<double> * pchi = &STO_WF.chiortho[ik](ichi,0);
+                stohchi.orthogonal_to_psi_real(p0,pchi,ik);
+            }
+        }
+    }
+
+    //if(NBANDS > 0)
+    //{
+    //    Emin = wf.ekb[0][0];}
+    //Emax = pw.ecutwfc * 20-250;
+    Emax = 750;
+    Emin = -10;
+    Stochastic_hchi:: Emin = this->Emin;
+    Stochastic_hchi:: Emax = this->Emax;
+     
+    sumpolyval();
+    mu = mu0 - 2;
+    double ne1 = calne();
+    double mu1 = mu;
+    mu = mu0 + 2;
+    double ne2 = calne();
+    double mu2 = mu;
+    th_ne = 1e-6;
+    double Dne = th_ne + 1;
+    double ne3;
+    double mu3;
+    
+    mu = 0;
+    ne1 = calne();
+    //test the domin of mu
+    /*for(mu = -5; mu<5;mu+=0.2)
+    {
+        ne1 = calne();
+        cout<<"mu: "<<mu<<" ; ne: "<<ne1<<endl;
+    }*/
+    while(ne1 > targetne)
+    {
+        mu1 -= 2;
+        mu = mu1;
+        ne1 = calne();
+        cout<<"Reset mu1 from "<<mu1+2<<" to "<<mu1<<endl;
+    }
+    while(ne2 < targetne)
+    {
+        mu2 += 2;
+        mu = mu2;
+        ne2 = calne();
+        cout<<"Reset mu2 form "<<mu2-2<<" to "<<mu2<<endl;
+    }
+
+    int count = 0;
+    while(Dne > th_ne)
+    {
+        mu3 = (mu2 + mu1) / 2;
+        mu = mu3;
+        ne3 = calne();
+        if(ne3 < targetne)
+        {
+            ne1 = ne3;
+            mu1 = mu3;
+        }
+        else if(ne3 > targetne)
+        {
+            ne2 = ne3;
+            mu2 = mu3;
+        }
+        Dne = abs(targetne - ne3);
+        //cout<<setw(20)<<"targetne"<<setw(20)<<"ne"<<setw(20)<<"mu"<<setw(20)<<"Dne"<<endl;
+        //cout<<setw(20)<<targetne<<setw(20)<<ne3<<setw(20)<<mu3<<setw(20)<<Dne<<endl;
+        count++;
+        if(count > 100)
+        {
+            cout<<"Fermi energy cannot be converged."<<endl;
+        }
+    }
+    cout<<"Converge fermi energy = "<<mu<<" Ry in "<<count<<" steps."<<endl;
+
+    mu = mu0 = mu3;
+    
+    //Set wf.wg 
+    if(NBANDS > 0)
+    {
+        for(int ikk = 0; ikk < nkk; ++ikk)
+        {
+            double *en=wf.ekb[ikk];
+            for(int iksb = 0; iksb < NBANDS; ++iksb)
+            {
+                wf.wg(ikk,iksb) = fd(en[iksb])*kv.wk[ikk];
+            }
+        }
+    }
+}
+
+void Stochastic_Iter:: sumpolyval()
+{
+    int norder = stoche.norder;
+    //wait for init
+    int nkk=1;
+    
+    int nrxx = stohchi.nrxx;
+    ZEROS(spolyv, norder);
+    
+    complex<double> * pchi;
+    for(int ik = 0; ik < nkk; ++ik)
+    {
+        for(int ichi = 0; ichi < nchip; ++ichi)
+        {
+            if(NBANDS > 0)
+            {
+                pchi = &STO_WF.chiortho[ik](ichi,0);
+            }  
+            else
+            {
+                pchi = &STO_WF.chi0[ik](ichi,0);
+            }
+            stoche.calpolyval(stohchi.hchi_real, nrxx, pchi);
+            for(int ior = 0; ior < norder; ++ior)
+            {
+                spolyv[ior] += stoche.polyvalue[ior].real();
+            }
+        }
+    }
+
+    return;
+}
+
+
+double Stochastic_Iter::calne()
+{  
+    //wait for init
+    int nkk = 1;
+
+    stoche.calcoef(this->nfd);
+    int norder = stoche.norder;
+    double totne = 0;
+    KS_ne = 0;
+    for(int ikk = 0; ikk < nkk; ++ikk)
+    {
+        for(int ior = 0; ior < norder; ++ior)
+        {
+            totne += stoche.coef[ior] * spolyv[ior] * kv.wk[ikk];
+        }
+        //cout<<endl;
+        if(NBANDS > 0)
+        {
+            double *en=wf.ekb[ikk];
+            //number of electrons in KS orbitals
+            for(int iksb = 0; iksb < NBANDS; ++iksb)
+            {
+                KS_ne += fd(en[iksb]) * kv.wk[ikk];
+            }
+        }
+            
+    }
+    totne += KS_ne;
+    
+    return totne;
+}
+
+void Stochastic_Iter::sum_stoband()
+{  
+    //stoche.calcoef(this->nfd);
+    stoche.calcoef(this->nroot_fd);
+
+    int nkk=1;// We temporarily use gamma k point.
+    int nrxx = stohchi.nrxx;
+    
+    complex<double> * out = new complex<double> [nrxx];
+    complex<double> * hout = new complex<double> [nrxx];
+
+    double dr3 = nrxx / ucell.omega;
+    double Ebar = (Emin + Emax)/2;
+	double DeltaE = (Emax - Emin)/2;
+   
+    double out2, tmpne;
+    double sto_ne;
+    double sto_ekband=0;
+    cout<<"Eband 1 "<<en.eband<<endl;
+    complex<double> * pchi;
+    for(int ik = 0; ik < nkk; ++ik)
+    {
+        sto_ekband =0;
+        double *rho = CHR.rho[ik];
+        tmpne = 0;
+        for(int ichi = 0; ichi < nchip; ++ichi)
+        {
+            if(NBANDS > 0)
+            {
+                pchi = &STO_WF.chiortho[ik](ichi,0);
+            }  
+            else
+            {
+                pchi = &STO_WF.chi0[ik](ichi,0);
+            }   
+            stoche.calresult(stohchi.hchi_real, nrxx, pchi, out);
+            stohchi.hchi_real(out,hout);
+            for(int ir = 0; ir < nrxx; ++ir)
+            {
+                out2 = norm(out[ir]);
+                tmpne = out2 * kv.wk[ik];
+                sto_ekband += real(conj(out[ir]) * hout[ir]) * DeltaE + Ebar * out2;
+                rho[ir] += tmpne * dr3; 
+                sto_ne += tmpne;
+                //rho[ir] += real(conj(pchi[ir]) * out[ir]) * kv.wk[ik] * nrxx / ucell.omega;
+            }
+        }
+        en.eband += sto_ekband * kv.wk[ik];
+    }
+    cout<<"Eband 2 "<<en.eband<<endl;
+    cout<<"Renormalize rho from ne = "<<sto_ne+KS_ne<<" to targetne = "<<targetne<<endl;
+    double factor = targetne / (sto_ne + KS_ne);
+    for(int ik = 0; ik < nkk; ++ik)
+    {
+        for(int ir = 0; ir < nrxx ; ++ir)
+        {
+            CHR.rho[ik][ir] *= factor;
+        }
+    }
+    
+
+
+    /*double sum = 0;
+	for(int ir = 0; ir < pw.nrxx; ++ir )
+	{
+		sum += CHR.rho[0][ir];
+	}
+	cout<<"Total nele: "<<sum*ucell.omega/pw.nrxx<<endl;*/
+    
+    delete [] out;
+    delete [] hout;
+    return;
+}
+
+double Stochastic_Iter:: root_fd(double e)
+{
+    double e_mu = (e - mu) / Occupy::gaussian_parameter ;
+    if(e_mu > 200)
+        return 0;
+    else
+        return 1 / sqrt(1 + exp(e_mu));
+}
+
+double Stochastic_Iter:: nroot_fd(double e)
+{
+    double Ebar = (Emin + Emax)/2;
+	double DeltaE = (Emax - Emin)/2;
+    double ne_mu = (e * DeltaE + Ebar - mu) / Occupy::gaussian_parameter ;
+    if(ne_mu > 200)
+        return 0;
+    else
+        return 1 / sqrt(1 + exp(ne_mu));
+}
+
+double Stochastic_Iter:: fd(double e)
+{
+    double e_mu = (e - mu) / Occupy::gaussian_parameter ;
+    if(e_mu > 100)
+        return 0;
+    else
+        return 1 / (1 + exp(e_mu));
+}
+
+double Stochastic_Iter:: nfd(double e)
+{
+    double Ebar = (Emin + Emax)/2;
+	double DeltaE = (Emax - Emin)/2;
+    double ne_mu = (e * DeltaE + Ebar - mu) / Occupy::gaussian_parameter ;
+    if(ne_mu > 100)
+        return 0;
+    else
+        return 1 / (1 + exp(ne_mu));
+}
+ void Stochastic_Iter:: test()
+ {
+     //=====================test============================
+    /*
+    complex<double> *in = new complex<double> [pw.nrxx];
+    
+    complex<double> *chig1 = new complex<double> [wf.npw];
+    complex<double> *chig2 = new complex<double> [wf.npw];
+    ZEROS(in,pw.nrxx);
+    ZEROS(in2,pw.nrxx);*/
+
+    //---------------------------------------------------
+    /*//test hermit property of  hchi matrix
+    Emin = -1;
+    Emax = 1;
+    Stochastic_hchi:: Emin = this -> Emin;
+    Stochastic_hchi:: Emax = this -> Emax;
+    complex<double> *out = new complex<double> [pw.nrxx];
+    complex<double> *in2 = new complex<double> [pw.nrxx];
+    cout<<"------------------------------------"<<endl;
+    complex<double> cij,cji;
+    double dc;
+    for(int i = 0 ; i < 300 ; ++i)
+    {
+        if( i % 10  == 0)
+            cout<<"We are testing "<<i+1 <<" rows..."<<endl;
+        for(int j = i+1; j < 300 ; ++j)
+        {
+            in2[j] = 1;
+            stohchi.hchi_real(in2,out);
+            cij = out[i];
+            in2[j] = 0;
+            in2[i] = 1;
+            stohchi.hchi_real(in2,out);
+            cji = out[j];
+            in2[i] = 0;
+            dc = abs(conj(cij)-cji);
+            if(dc > 1e-6)
+            {
+                cout<<"(i,j) = ("<<i+1<<" , "<<j+1<<") ; cij = "<<cij<<" ; cji = "<<cji<<endl;
+            }
+
+        }
+    }
+    cout<<"------------------------------------"<<endl;
+    delete[] out;
+    delete[] in2;*/
+    //---------------------------------------------------
+    
+    //-------------------------------------------------------
+    /*//compare hchi and h_psi
+    Emin = -1;
+    Emax = 1;
+    Stochastic_hchi:: Emin = this -> Emin;
+    Stochastic_hchi:: Emax = this -> Emax;
+    int * GRA_index = stohchi.GRA_index;
+    ZEROS(in,pw.nrxx);
+    complex<double> *kswf = &wf.evc[0](0,0);
+    for ( int ig = 0 ; ig< wf.npw; ++ig)
+    {
+        in[GRA_index[ig]] = kswf [ig];
+    }
+    fftw_plan pp=fftw_plan_dft_3d(pw.nx,pw.ny,pw.nz,(fftw_complex *)in,(fftw_complex *)in2, FFTW_BACKWARD, FFTW_ESTIMATE);
+    fftw_execute(pp);
+    stohchi.hchi_real(in2,out);
+    fftw_plan pp2=fftw_plan_dft_3d(pw.nx,pw.ny,pw.nz,(fftw_complex *)out,(fftw_complex *)out, FFTW_FORWARD, FFTW_ESTIMATE);
+    fftw_execute(pp2);
+    for(int i  = 0; i <wf.npw;++i)
+    {
+        chig1[i] = out[GRA_index[i]] / pw.nrxx; 
+    }
+
+    hm.hpw.h_psi( kswf , chig2);
+    for(int i = 0; i<wf.npw;++i)
+    {
+        if(i % 100 == 0)
+        cout<<kswf[i]<<" "<<in[GRA_index[i]]<<" "<<chig1[i]<<" "<<chig2[i]<<endl;
+    }
+    cout<<endl;*/
+    //---------------------------------------------------
+    
+    //---------------------------------------------------
+    //compare eigen energy
+    /*
+    int * GRA_index = stohchi.GRA_index;
+    complex<double> *chigout = new complex<double> [wf.npw];
+    complex<double> *wave = new complex<double> [pw.nrxx];
+    complex<double> *waveout = new complex<double> [pw.nrxx];
+    Emax = 1000;
+    Emin = 0;
+    Stochastic_hchi:: Emin = this->Emin;
+    Stochastic_hchi:: Emax = this->Emax;
+    double Ebar = (Emax + Emin)/2;
+    double DeltaE = (Emax - Emin)/2;
+    fftw_plan pp=fftw_plan_dft_3d(pw.nx,pw.ny,pw.nz,(fftw_complex *)wave,(fftw_complex *)wave, FFTW_BACKWARD, FFTW_ESTIMATE);
+    for(int ib = 0 ; ib < NBANDS ; ++ib)
+    {
+        complex<double> *kswf = &wf.evc[0](ib,0);
+        hm.hpw.h_psi( kswf , chigout);
+        double energy = 0;
+        double norm1 =0;
+        ZEROS(wave,pw.nrxx);
+        for(int ig = 0 ; ig < wf.npw ; ++ig)
+        {
+            energy += real(conj(kswf[ig]) * chigout[ig]);
+            norm1 += norm (kswf[ig]);
+            wave[GRA_index[ig]] = kswf[ig];
+        }
+        fftw_execute(pp);
+        stohchi.hchi_real(wave,waveout);
+        double energy2 = 0;
+        double norm2 =0;
+        for(int ir = 0 ; ir < pw.nrxx ; ++ir)
+        {
+            energy2 += real(conj(wave[ir]) * waveout[ir]) * DeltaE + Ebar * norm(wave[ir]);
+            norm2 += norm(wave[ir]);
+        }
+
+
+        cout<<"BAND "<<ib+1<<" "<<energy/norm1<<"  "<<energy2/norm2<<endl;
+    }
+    delete []chigout;
+    delete []wave;
+    delete []waveout;*/
+    //---------------------------------------------------
+
+    //---------------------------------------------------
+    //test ne
+    /*
+    int * GRA_index = stohchi.GRA_index;
+    complex<double> *wave = new complex<double> [pw.nrxx];
+    complex<double> *waveout = new complex<double> [pw.nrxx];
+    Emax = 750;
+    Emin = -100;
+    Stochastic_hchi:: Emin = this->Emin;
+    Stochastic_hchi:: Emax = this->Emax;
+    mu = en.ef;
+    stoche.calcoef(this->nfd);
+    fftw_plan pp=fftw_plan_dft_3d(pw.nx,pw.ny,pw.nz,(fftw_complex *)wave,(fftw_complex *)wave, FFTW_BACKWARD, FFTW_ESTIMATE);
+    for(int ib = 0 ; ib < NBANDS ; ++ib)
+    {
+        ZEROS(wave,pw.nrxx);
+        complex<double> *kswf = &wf.evc[0](ib,0);
+        for(int ig = 0 ; ig < wf.npw ; ++ig)
+        {
+            wave[GRA_index[ig]] = kswf[ig];
+        }
+        fftw_execute(pp);
+        double ne =0;
+        double norm1 = 0;
+        stoche.calresult(stohchi.hchi_real, pw.nrxx, wave, waveout);
+        for(int ir = 0 ; ir < pw.nrxx ; ++ir)
+        {   
+            ne += real(conj(wave[ir]) * waveout[ir]);
+            norm1 += norm(wave[ir]);
+        }
+        cout<<"Ne of Band "<<ib+1<<" is "<<ne/norm1 * kv.wk[0]<<endl;
+    }
+    delete []wave;
+    delete []waveout;*/
+
+
+    //=====================================================
+ }
diff --git a/ABACUS.develop/source/src_pw/sto_iter.h b/ABACUS.develop/source/src_pw/sto_iter.h
index 6b3be8d327..54cc52d012 100644
--- a/ABACUS.develop/source/src_pw/sto_iter.h
+++ b/ABACUS.develop/source/src_pw/sto_iter.h
@@ -2,6 +2,8 @@
 #define INCLUDE_STO_ITER_H
 
 #include "tools.h"
+#include "sto_che.h"
+#include "sto_hchi.h"
 
 //----------------------------------------------
 // Solve for the new electron density and iterate 
@@ -18,12 +20,35 @@ class Stochastic_Iter
 
     // constructor and deconstructor
     Stochastic_Iter();
-
     ~Stochastic_Iter();
 
-	double mu; // chemical potential
+    void init();
+    
+    void sum_stoband();
+    double calne();
+    double caldnedmu();
+    void itermu();
+    void sumpolyval();
+    void test(); //only for test
+    Stochastic_Chebychev stoche;
+    Stochastic_hchi stohchi;
+
+
+	static double mu, mu0; // chemical potential; unit in Ry
+    static double Emin, Emax; // unit in Ry
+    double targetne;
+    double * spolyv;
 
 	private:
+    
+    double nchip;
+    double th_ne;
+    double KS_ne;
+
+    static double root_fd(double e);
+    static double fd(double e);
+    static double nroot_fd(double e);
+    static double nfd(double e);
 
 
 };
diff --git a/ABACUS.develop/source/src_pw/sto_wf.cpp b/ABACUS.develop/source/src_pw/sto_wf.cpp
index daf6903d34..8b39c10ef5 100644
--- a/ABACUS.develop/source/src_pw/sto_wf.cpp
+++ b/ABACUS.develop/source/src_pw/sto_wf.cpp
@@ -4,21 +4,25 @@
 
 Stochastic_WF::Stochastic_WF()
 {
-    chi  = new ComplexMatrix[1];
+    chiortho  = new ComplexMatrix[1];
     chi0  = new ComplexMatrix[1];
 }
 
 Stochastic_WF::~Stochastic_WF()
 { 
-    delete[] chi;
+    delete[] chiortho;
     delete[] chi0;
 }
 
 void Stochastic_WF::init()
 {
     //wait for init
-    int nrxx;
-    int nx,ny,nz;
+    int nrxx = pw.nrxx;
+    int nx = pw.nx,ny = pw.ny,nz = pw.nz;
+
+    //distribute nchi for each process
+    nchip = int(nchi/NPROC_IN_POOL);
+    if(RANK_IN_POOL < nchi%NPROC_IN_POOL) ++nchip;
 
     complex<double> ui(0,1);
     
@@ -26,18 +30,24 @@ void Stochastic_WF::init()
     //We temporarily init one group of orbitals for all k points.
     //This save memories.
     chi0 = new ComplexMatrix[1]; 
-    chi0[0].create(nchip,nrxx,0);
+    chi0[0].create(nchip,nrxx,false);
     //init with random number
+    
+    srand((unsigned)time(NULL));
     for(int i=0; i<chi0[0].size; ++i)
     {
-        chi0[0].c[i]=exp(2*PI*rand()*ui);
+        chi0[0].c[i]=exp(2*PI*rand()/double(RAND_MAX)*ui) / sqrt(double(nchi));
     }
+    
 
-    delete[] chi;
+    delete[] chiortho;
     int nkk = 1; // We temporarily use gamma k point.
-    chi = new ComplexMatrix[nkk];
+    chiortho = new ComplexMatrix[1];
+    if(NBANDS > 0)
+    {
+        chiortho[0].create(nchip,nrxx,false);
+    }
     return;
 }
 
 
-
diff --git a/ABACUS.develop/source/src_pw/sto_wf.h b/ABACUS.develop/source/src_pw/sto_wf.h
index 8f73faeeb3..184478ef9d 100644
--- a/ABACUS.develop/source/src_pw/sto_wf.h
+++ b/ABACUS.develop/source/src_pw/sto_wf.h
@@ -19,14 +19,12 @@ class Stochastic_WF
 
 
 	void init();
-	void calculate_chi();
-
 
 	// ComplexMatrix may not be a best filetype to store the electronic wave functions
     ComplexMatrix* chi0;  	// origin stochastic wavefunctions in real space
-	ComplexMatrix* chi;	 	// stochastic wavefunctions after orthogonalized with KS wavefunctions
+	ComplexMatrix* chiortho;	// stochastic wavefunctions after in reciprocal space orthogonalized with KS wavefunctions
 	Stochastic_Chebychev sto_che;
-	int nchi; 				// Total number of stochatic obitals
+	int nchi; 				// Total number of stochatic obitals; unit in a_0^(3/2)
 	int nchip; 				// The number of stochatic obitals in current process 
 	int nche_sto;			// number of orders for Chebyshev expansion 
 
diff --git a/ABACUS.develop/source/src_pw/threshold_elec.cpp b/ABACUS.develop/source/src_pw/threshold_elec.cpp
index a89dec9988..7233fa6eee 100644
--- a/ABACUS.develop/source/src_pw/threshold_elec.cpp
+++ b/ABACUS.develop/source/src_pw/threshold_elec.cpp
@@ -40,7 +40,7 @@ void Threshold_Elec::set_ethr(void) const
     //================
     // self consistent
     //================
-    else if(CALCULATION=="scf" || CALCULATION=="md" || CALCULATION=="relax")
+    else if(CALCULATION=="scf" || CALCULATION=="md" || CALCULATION=="relax" || CALCULATION=="scf-sto")//qianrui 2021-2-20
     {
         if (ETHR == 0.0)
         {
diff --git a/ABACUS.develop/source/src_pw/unitcell_pseudo.cpp b/ABACUS.develop/source/src_pw/unitcell_pseudo.cpp
index c74fedd0b1..d069261835 100644
--- a/ABACUS.develop/source/src_pw/unitcell_pseudo.cpp
+++ b/ABACUS.develop/source/src_pw/unitcell_pseudo.cpp
@@ -1261,7 +1261,8 @@ void UnitCell_pseudo::cal_nelec(void)
 		}
 	}
 	
-	
+	if ( CALCULATION!="scf-sto" && CALCULATION!="relax-sto" && CALCULATION!="md-sto" ) //qianrui 2021-2-20
+	{
 	if(NBANDS == 0)
 	{
 		if(NSPIN == 1)
@@ -1287,10 +1288,11 @@ void UnitCell_pseudo::cal_nelec(void)
 			OUT(ofs_running,"nelup",mag.get_nelup());
 			WARNING_QUIT("unitcell","Too few spin up bands!");
 		}
-		if(NBANDS < mag.get_neldw() ) 
-		{
-			WARNING_QUIT("unitcell","Too few spin down bands!");
-		}
+		if(NBANDS < mag.get_neldw() )
+        {
+            WARNING_QUIT("unitcell","Too few spin down bands!");
+        }
+	}
 	}
 
 	// mohan update 2021-02-19