From 5056f2cfe972e51b229d780673921a55207f44b6 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 28 Aug 2019 14:08:15 +0800
Subject: [PATCH 01/38] add ewald, passed ener and force test

---
 source/lib/include/Ewald.h | 221 +++++++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 source/lib/include/Ewald.h
diff --git a/source/lib/include/Ewald.h b/source/lib/include/Ewald.h
new file mode 100644
index 0000000000..b6c60f584f
--- /dev/null
+++ b/source/lib/include/Ewald.h
@@ -0,0 +1,221 @@
+#pragma once
+
+#include<algorithm>
+#include<cassert>
+
+#include "SimulationRegion.h"
+
+const double ElectrostaticConvertion = 14.39964535475696995031;
+
+template <typename VALUETYPE>
+struct EwaldParameters 
+{
+  VALUETYPE rcut = 6.0;
+  VALUETYPE beta = 2;
+  VALUETYPE spacing = 4;
+};
+
+
+
+template<typename VALUETYPE> 
+VALUETYPE
+dir_err_esti(const VALUETYPE & test_q,
+	     const VALUETYPE & c2,
+	     const VALUETYPE & nn,
+	     const EwaldParameters<VALUETYPE> & param) 
+{
+  const VALUETYPE & rcut = param.rcut;
+  const VALUETYPE & beta = param.beta;
+  const VALUETYPE rho_q2 = c2/nn;  
+  VALUETYPE sum = 2 * test_q 
+      * sqrt (rho_q2 / rcut)
+      * exp (- beta*beta*rcut*rcut) * ElectrostaticConvertion;
+  return sum;
+}
+
+template<typename VALUETYPE> 
+VALUETYPE
+rec_err_esti(const VALUETYPE & test_q,
+	     const VALUETYPE & c2,
+	     const VALUETYPE & nn,
+	     const EwaldParameters<VALUETYPE>&	param,
+	     const SimulationRegion<double>&	region) 
+{
+  const VALUETYPE & beta = param.beta;
+  vector<int> KK;
+  cmpt_k(KK, region, param);
+  const double * rec_box = region.getRecBoxTensor();
+  double sum = 0;
+  int BD[3];
+  for (int dd = 0; dd < 3; ++dd){
+    BD[dd] = KK[dd]/2 + 10;
+  }
+  int mm[3];
+  for (mm[0] = -BD[0]; mm[0] <= BD[0]; ++mm[0]){
+    for (mm[1] = -BD[1]; mm[1] <= BD[1]; ++mm[1]){
+      for (mm[2] = -BD[2]; mm[2] <= BD[2]; ++mm[2]){
+        if (mm[0] >= - int(KK[0])/2 && mm[0] <= int(KK[0])/2 &&
+            mm[1] >= - int(KK[1])/2 && mm[1] <= int(KK[1])/2 &&
+            mm[2] >= - int(KK[2])/2 && mm[2] <= int(KK[2])/2) continue;
+	VALUETYPE rm[3] = {0,0,0};	  
+	for (int dd = 0; dd < 3; ++dd){
+	  rm[0] += mm[dd] * rec_box[dd*3+0];
+	  rm[1] += mm[dd] * rec_box[dd*3+1];
+	  rm[2] += mm[dd] * rec_box[dd*3+2];
+	}
+	VALUETYPE mm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
+        sum += exp (-2 * M_PI * M_PI / beta / beta * mm2) / mm2;
+      }
+    }
+  }
+  VALUETYPE vol = region.getVolume();
+  // cout << sqrt(sum) << " " << KK[0] << " " << rec_box[0] << " " << c2 << " " << vol << endl;
+  sum = test_q * 2 * sqrt(sum) * sqrt(c2) / vol * ElectrostaticConvertion;
+  return sum;
+}
+
+template <typename VALUETYPE>
+void
+cmpt_k(vector<int> & KK,
+       const SimulationRegion<VALUETYPE>&	region, 
+       const EwaldParameters<VALUETYPE>&	param)
+{
+  const double * boxt_ = region.getBoxTensor();
+  VALUETYPE boxt[9];
+  for (int dd = 0; dd < 9; ++dd){
+    boxt[dd] = static_cast<VALUETYPE>(boxt_[dd]);
+  }  
+  KK.resize(3);
+  for (int dd = 0; dd < 3; ++dd){
+    VALUETYPE ll = sqrt(MathUtilities::dot<VALUETYPE>(boxt+dd*3, boxt+dd*3));
+    KK[dd] = ll / param.spacing;
+    // KK[dd] should be large enough 
+    if (KK[dd] * param.spacing < ll) KK[dd] += 1;
+    assert(KK[dd] * param.spacing >= ll);
+    // KK[dd] should be even
+    if ((KK[dd] / 2) * 2 != KK[dd]) KK[dd] += 1;
+    assert((KK[dd] / 2) * 2 == KK[dd]);
+  }
+}
+       
+
+// compute the reciprocal part of the Ewald sum.
+// outputs: energy force virial
+// inputs: coordinates charges region
+template <typename VALUETYPE>
+void 
+EwaldReciprocal(VALUETYPE &			ener, 
+		vector<VALUETYPE> &		force,
+		vector<VALUETYPE> &		virial,
+		const vector<VALUETYPE>&	coord,
+		const vector<VALUETYPE>&	charge,
+		const SimulationRegion<double>& region, 
+		const EwaldParameters<VALUETYPE>&	param)
+{
+  // natoms
+  int natoms = charge.size();
+  // init returns
+  force.resize(natoms * 3);  
+  virial.resize(9);
+  ener = 0;
+  fill(force.begin(), force.end(), static_cast<VALUETYPE>(0));
+  fill(virial.begin(), virial.end(), static_cast<VALUETYPE>(0));
+  
+  vector<int> KK(3);
+  int totK = 1;
+  cmpt_k<VALUETYPE>(KK, region, param);
+  for (int dd = 0; dd < 3; ++dd){
+    totK *= (KK[dd]+1);
+  }  
+  
+  // compute the sq
+  VALUETYPE * sqr = new VALUETYPE[totK];
+  VALUETYPE * sqi = new VALUETYPE[totK];
+  for (int ii = 0; ii < totK; ++ii){
+    sqr[ii] = static_cast<VALUETYPE>(0);
+    sqi[ii] = static_cast<VALUETYPE>(0);
+  }
+  // firstly loop over particles then loop over m
+  int mm[3];
+  for (int ii = 0; ii < natoms; ++ii){
+    double ir[3];
+    region.phys2Inter(ir, &coord[ii*3]);
+    double mr[3];
+    int mc = 0;
+    for (mm[0] = -KK[0]/2; mm[0] <= KK[0]/2; ++mm[0]){
+      mr[0] = ir[0] * mm[0];
+      for (mm[1] = -KK[1]/2; mm[1] <= KK[1]/2; ++mm[1]){
+	mr[1] = ir[1] * mm[1];
+	for (mm[2] = -KK[2]/2; mm[2] <= KK[2]/2; ++mm[2]){
+	  if (mm[0] == 0 && mm[1] == 0 && mm[2] == 0) continue;
+	  mr[2] = ir[2] * mm[2];
+	  double mdotr = 2. * M_PI * (mr[0]+mr[1]+mr[2]);
+	  sqr[mc] += charge[ii] * cos(mdotr);
+	  sqi[mc] += charge[ii] * sin(mdotr);
+	  ++mc;
+	}
+      }
+    }
+  }
+  VALUETYPE rec_box[9];
+  const double * rec_box_ = region.getRecBoxTensor();
+  for (int ii = 0; ii < 9; ++ii){
+    rec_box[ii] = static_cast<VALUETYPE>(rec_box_[ii]);
+  }
+  // calculate ener, force and virial
+  // firstly loop over particles then loop over m
+  int mc = 0;
+  for (mm[0] = -KK[0]/2; mm[0] <= KK[0]/2; ++mm[0]){
+    for (mm[1] = -KK[1]/2; mm[1] <= KK[1]/2; ++mm[1]){
+      for (mm[2] = -KK[2]/2; mm[2] <= KK[2]/2; ++mm[2]){
+	if (mm[0] == 0 && mm[1] == 0 && mm[2] == 0) continue;
+	// \bm m and \vert m \vert^2
+	VALUETYPE rm[3] = {0,0,0};	  
+	for (int dd = 0; dd < 3; ++dd){
+	  rm[0] += mm[dd] * rec_box[dd*3+0];
+	  rm[1] += mm[dd] * rec_box[dd*3+1];
+	  rm[2] += mm[dd] * rec_box[dd*3+2];
+	}
+	VALUETYPE mm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
+	// energy
+	VALUETYPE expmm2 = exp(- M_PI * M_PI * mm2 / (param.beta * param.beta)) / mm2;
+	VALUETYPE eincr = expmm2 * (sqr[mc] * sqr[mc] + sqi[mc] * sqi[mc]);
+	ener += eincr;
+	// virial
+	VALUETYPE vpref = -2. * (1. + M_PI * M_PI * mm2 / (param.beta * param.beta)) / mm2;
+	for (int dd0 = 0; dd0 < 3; ++dd0){
+	  for (int dd1 = 0; dd1 < 3; ++dd1){	    
+	    VALUETYPE tmp = vpref * rm[dd0] * rm[dd1];
+	    if (dd0 == dd1) tmp += 1;
+	    virial[dd0*3+dd1] += eincr * tmp;
+	  }
+	}
+	// force
+	for (int ii = 0; ii < natoms; ++ii){
+	  VALUETYPE mdotr = - 2. * M_PI * (coord[ii*3+0]*rm[0] + coord[ii*3+1]*rm[1] + coord[ii*3+2]*rm[2]);
+	  VALUETYPE tmpr = charge[ii] * cos(mdotr);
+	  VALUETYPE tmpi = charge[ii] * sin(mdotr);
+	  VALUETYPE cc = 4. * M_PI * (tmpr * sqi[mc] + tmpi * sqr[mc]) * expmm2;
+	  force[ii*3+0] -= rm[0] * cc;
+	  force[ii*3+1] -= rm[1] * cc;
+	  force[ii*3+2] -= rm[2] * cc;
+	}	  
+	++mc;
+      }
+    }
+  }
+  VALUETYPE vol = static_cast<VALUETYPE>(region.getVolume());
+  ener /= 2 * M_PI * vol;
+  ener *= ElectrostaticConvertion;
+  for (int ii = 0; ii < 3*natoms; ++ii){
+    force[ii] /= 2 * M_PI * vol;
+    force[ii] *= ElectrostaticConvertion;
+  }  
+  for (int ii = 0; ii < 3*3; ++ii){
+    virial[ii] /= 2 * M_PI * vol;
+    virial[ii] *= ElectrostaticConvertion;
+  }  
+  delete[]sqr;
+  delete[]sqi;
+}
+

From eb4ca51a95779041ca5ae03b62b7e6b7f811de03 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 16 Oct 2019 22:37:01 +0800
Subject: [PATCH 02/38] add descrption for the printed data

---
 source/lib/include/Ewald.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/source/lib/include/Ewald.h b/source/lib/include/Ewald.h
index b6c60f584f..0fd110c1f0 100644
--- a/source/lib/include/Ewald.h
+++ b/source/lib/include/Ewald.h
@@ -15,8 +15,6 @@ struct EwaldParameters
   VALUETYPE spacing = 4;
 };
 
-
-
 template<typename VALUETYPE> 
 VALUETYPE
 dir_err_esti(const VALUETYPE & test_q,
@@ -69,7 +67,11 @@ rec_err_esti(const VALUETYPE & test_q,
     }
   }
   VALUETYPE vol = region.getVolume();
-  // cout << sqrt(sum) << " " << KK[0] << " " << rec_box[0] << " " << c2 << " " << vol << endl;
+  // cout << "sum: " << sqrt(sum) 
+  //      << " KK: " << KK[0] 
+  //      << " rbox: " << rec_box[0] 
+  //      << " c2: " << c2 
+  //      << " vol: " << vol << endl;
   sum = test_q * 2 * sqrt(sum) * sqrt(c2) / vol * ElectrostaticConvertion;
   return sum;
 }

From c98dd918c8e48d73762b8f9aeee33fc82e6e4821 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 17 Oct 2019 21:07:26 +0800
Subject: [PATCH 03/38] implement op ewald_recp, tested force

---
 source/op/CMakeLists.txt    |   2 +-
 source/op/ewald_recp.cc     | 158 ++++++++++++++++++++++++++++++++++++
 source/tests/test_ewald.py  | 100 +++++++++++++++++++++++
 source/train/CMakeLists.txt |   2 +-
 source/train/EwaldRecp.py   |  22 +++++
 5 files changed, 282 insertions(+), 2 deletions(-)
 create mode 100644 source/op/ewald_recp.cc
 create mode 100644 source/tests/test_ewald.py
 create mode 100644 source/train/EwaldRecp.py

diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index 58051ff45b..6ad4d1b929 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -3,7 +3,7 @@
 set(OP_LIB ${PROJECT_SOURCE_DIR}/lib/src/SimulationRegion.cpp ${PROJECT_SOURCE_DIR}/lib/src/NeighborList.cpp)
 
 set (OP_CXX_FLAG -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI} )
-file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a.cc descrpt_se_r.cc tab_inter.cc prod_force_se_a.cc prod_virial_se_a.cc prod_force_se_r.cc prod_virial_se_r.cc soft_min.cc soft_min_force.cc soft_min_virial.cc )
+file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a.cc descrpt_se_r.cc tab_inter.cc prod_force_se_a.cc prod_virial_se_a.cc prod_force_se_r.cc prod_virial_se_r.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc)
 file(GLOB OP_GRADS_SRC prod_force_grad.cc prod_force_se_a_grad.cc prod_force_se_r_grad.cc prod_virial_grad.cc prod_virial_se_a_grad.cc prod_virial_se_r_grad.cc soft_min_force_grad.cc soft_min_virial_grad.cc )
 file(GLOB OP_PY *.py)
 
diff --git a/source/op/ewald_recp.cc b/source/op/ewald_recp.cc
new file mode 100644
index 0000000000..d2d04601a0
--- /dev/null
+++ b/source/op/ewald_recp.cc
@@ -0,0 +1,158 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+#include "Ewald.h"
+
+typedef double boxtensor_t ;
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE ;
+#else 
+typedef float  VALUETYPE ;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("EwaldRecp")
+.Input("coord: double")
+.Input("charge: double")
+.Input("natoms: int32")
+.Input("box: double")
+.Attr("ewald_beta: float")
+.Attr("ewald_h: float")
+.Output("energy: double")
+.Output("force: double")
+.Output("virial: double");
+#else
+REGISTER_OP("EwaldRecp")
+.Input("coord: float")
+.Input("charge: float")
+.Input("natoms: int32")
+.Input("box: float")
+.Attr("ewald_beta: float")
+.Attr("ewald_h: float")
+.Output("energy: float")
+.Output("force: float")
+.Output("virial: float");
+#endif
+
+class EwaldRecpOp : public OpKernel {
+public:
+  explicit EwaldRecpOp(OpKernelConstruction* context) : OpKernel(context) {
+    float beta, spacing;
+    OP_REQUIRES_OK(context, context->GetAttr("ewald_beta", &(beta)));
+    OP_REQUIRES_OK(context, context->GetAttr("ewald_h", &(spacing)));
+    ep.beta = beta;
+    ep.spacing = spacing;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int cc = 0;
+    const Tensor& coord_tensor	= context->input(cc++);
+    const Tensor& charge_tensor	= context->input(cc++);
+    const Tensor& natoms_tensor	= context->input(cc++);
+    const Tensor& box_tensor	= context->input(cc++);
+
+    // set size of the sample
+    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of coord should be 2"));
+    OP_REQUIRES (context, (charge_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of type should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) == 1),	errors::InvalidArgument ("size of natoms should be 1"));
+    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of box should be 2"));
+    auto natoms	= natoms_tensor.flat<int>();
+    int nloc = natoms(0);
+    int nsamples = coord_tensor.shape().dim_size(0);
+
+    // check the sizes
+    OP_REQUIRES (context, (nsamples == coord_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nsamples == charge_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+
+    OP_REQUIRES (context, (nloc * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
+    OP_REQUIRES (context, (nloc == charge_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
+    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
+
+    // Create an output tensor
+    TensorShape energy_shape ;
+    energy_shape.AddDim (nsamples);
+    TensorShape force_shape ;
+    force_shape.AddDim (nsamples);
+    force_shape.AddDim (nloc * 3);
+    TensorShape virial_shape ;
+    virial_shape.AddDim (nsamples);
+    virial_shape.AddDim (9);
+
+    cc = 0;
+    Tensor* energy_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(cc++, energy_shape, &energy_tensor));
+    Tensor* force_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(cc++, force_shape, &force_tensor));
+    Tensor* virial_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(cc++, virial_shape, &virial_tensor));
+    
+    auto coord	= coord_tensor	.matrix<VALUETYPE>();
+    auto charge	= charge_tensor	.matrix<VALUETYPE>();
+    auto box	= box_tensor	.matrix<VALUETYPE>();
+    auto energy	= energy_tensor	->flat<VALUETYPE>();
+    auto force	= force_tensor	->matrix<VALUETYPE>();
+    auto virial	= virial_tensor	->matrix<VALUETYPE>();
+
+    for (int kk = 0; kk < nsamples; ++kk){
+      // set region
+      boxtensor_t boxt [9] = {0};
+      for (int dd = 0; dd < 9; ++dd) {
+	boxt[dd] = box(kk, dd);
+      }
+      SimulationRegion<boxtensor_t > region;
+      region.reinitBox (boxt);
+
+      // set & normalize coord
+      vector<boxtensor_t > d_coord3_ (nloc*3);
+      for (int ii = 0; ii < nloc; ++ii){
+	for (int dd = 0; dd < 3; ++dd){
+	  d_coord3_[ii*3+dd] = coord(kk, ii*3+dd);
+	}
+	double inter[3];
+	region.phys2Inter (inter, &d_coord3_[3*ii]);
+	for (int dd = 0; dd < 3; ++dd){
+	  if      (inter[dd] < 0 ) inter[dd] += 1.;
+	  else if (inter[dd] >= 1) inter[dd] -= 1.;
+	}
+      }
+      vector<VALUETYPE > d_coord3 (nloc*3);
+      for (int ii = 0; ii < nloc * 3; ++ii) {
+	d_coord3[ii] = d_coord3_[ii];
+      }
+
+      // set charge
+      vector<VALUETYPE > d_charge (nloc);
+      for (int ii = 0; ii < nloc; ++ii) d_charge[ii] = charge(kk, ii);
+
+      // prepare outputs vectors
+      VALUETYPE d_ener;
+      vector<VALUETYPE> d_force(nloc*3);
+      vector<VALUETYPE> d_virial(9);
+
+      // compute
+      EwaldReciprocal(d_ener, d_force, d_virial, d_coord3, d_charge, region, ep);
+
+      // copy output
+      energy(kk) = d_ener;
+      for (int ii = 0; ii < nloc * 3; ++ii){
+	force(kk, ii) = d_force[ii];
+      }
+      for (int ii = 0; ii < 9; ++ii){
+	virial(kk, ii) = d_virial[ii];
+      }
+    }
+  }
+private:
+  EwaldParameters<VALUETYPE> ep;
+};
+
+REGISTER_KERNEL_BUILDER(Name("EwaldRecp").Device(DEVICE_CPU), EwaldRecpOp);
+
diff --git a/source/tests/test_ewald.py b/source/tests/test_ewald.py
new file mode 100644
index 0000000000..202fb19755
--- /dev/null
+++ b/source/tests/test_ewald.py
@@ -0,0 +1,100 @@
+import os,sys,platform
+import numpy as np
+import unittest
+from deepmd.env import tf
+
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.EwaldRecp import op_module
+
+if global_np_float_precision == np.float32 :
+    global_default_fv_hh = 1e-2
+    global_default_dw_hh = 1e-2
+    global_default_places = 3
+else :
+    global_default_fv_hh = 1e-6
+    global_default_dw_hh = 1e-4
+    global_default_places = 5
+
+
+class TestEwaldRecp (unittest.TestCase) :
+    def setUp(self):
+        boxl = 6
+        box_pert = 1
+        self.natoms = 16
+        self.nframes = 2
+        self.ewald_h = 1
+        self.ewald_beta = 1
+        self.dbox = []
+        self.dcoord = []
+        self.dcharge = []
+        for ii in range(self.nframes):
+            # box
+            box = np.ones([3,3]) * boxl
+            box += np.random.random([3,3]) * box_pert
+            self.dbox.append(0.5 * (box + box.T))
+            # scaled 
+            self.coord = np.random.random([self.natoms, 3])
+            # real coords
+            self.dcoord.append(np.matmul(self.coord, box))
+            # charge
+            dcharge = np.random.random([self.natoms])
+            dcharge -= np.average(dcharge)
+            assert(np.abs(np.sum(self.dcharge) - 0) < 1e-12)
+            self.dcharge.append(dcharge)
+        self.dbox = np.array(self.dbox).reshape([self.nframes, 9])
+        self.dcoord = np.array(self.dcoord).reshape([self.nframes, 3*self.natoms])
+        self.dcharge = np.array(self.dcharge).reshape([self.nframes, self.natoms])
+        # place holders
+        self.coord      = tf.placeholder(global_tf_float_precision, [None, self.natoms * 3], name='t_coord')
+        self.charge     = tf.placeholder(global_tf_float_precision, [None, self.natoms], name='t_charge')
+        self.box        = tf.placeholder(global_tf_float_precision, [None, 9], name='t_box')
+        self.nloc    = tf.placeholder(tf.int32, [1], name = "t_nloc")        
+
+    def test_force(self):
+        hh = 1e-4
+        places = 5
+        sess = tf.Session()
+        t_energy, t_force, t_virial \
+            = op_module.ewald_recp(self.coord, self.charge, self.nloc, self.box, 
+                                   ewald_h = self.ewald_h,
+                                   ewald_beta = self.ewald_beta)
+        [force] = sess.run([t_force], 
+                           feed_dict = {
+                               self.coord:  self.dcoord,
+                               self.charge: self.dcharge,
+                               self.box:    self.dbox,
+                               self.nloc:   [self.natoms],
+                           })
+        for idx in range(self.natoms):
+            for dd in range(3):
+                dcoordp = np.copy(self.dcoord)
+                dcoordm = np.copy(self.dcoord)
+                dcoordp[:,idx*3+dd] = self.dcoord[:,idx*3+dd] + hh
+                dcoordm[:,idx*3+dd] = self.dcoord[:,idx*3+dd] - hh
+                energyp = sess.run([t_energy], 
+                                   feed_dict = {
+                                       self.coord:  dcoordp,
+                                       self.charge: self.dcharge,
+                                       self.box:    self.dbox,
+                                       self.nloc:   [self.natoms],
+                                   })                                
+                energym = sess.run([t_energy], 
+                                   feed_dict = {
+                                       self.coord:  dcoordm,
+                                       self.charge: self.dcharge,
+                                       self.box:    self.dbox,
+                                       self.nloc:   [self.natoms],
+                                   })
+                c_force = -(energyp[0] - energym[0]) / (2*hh)
+                for ff in range(self.nframes):
+                    self.assertAlmostEqual(c_force[ff], force[ff,idx*3+dd], 
+                                           places = places,
+                                           msg = "frame %d force component [%d,%d] failed" % (ff, idx, dd))
+
+
+                
+
+
+
diff --git a/source/train/CMakeLists.txt b/source/train/CMakeLists.txt
index 83f15df4c3..7b990d3df9 100644
--- a/source/train/CMakeLists.txt
+++ b/source/train/CMakeLists.txt
@@ -2,7 +2,7 @@
 
 configure_file("RunOptions.py.in" "${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py" @ONLY)
 
-file(GLOB LIB_PY main.py common.py env.py compat.py Network.py Deep*.py Data.py DataSystem.py Model*.py Descrpt*.py Fitting.py Loss.py LearningRate.py Trainer.py TabInter.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py)
+file(GLOB LIB_PY main.py common.py env.py compat.py Network.py Deep*.py Data.py DataSystem.py Model*.py Descrpt*.py Fitting.py Loss.py LearningRate.py Trainer.py TabInter.py EwaldRecp.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py)
 
 file(GLOB CLS_PY  Local.py Slurm.py)
 
diff --git a/source/train/EwaldRecp.py b/source/train/EwaldRecp.py
new file mode 100644
index 0000000000..abbeba3608
--- /dev/null
+++ b/source/train/EwaldRecp.py
@@ -0,0 +1,22 @@
+import platform
+import os
+import numpy as np
+from deepmd.env import tf
+from deepmd.common import ClassArg
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+if platform.system() == "Windows":
+    ext = "dll"
+elif platform.system() == "Darwin":
+    ext = "dylib"
+else:
+    ext = "so"
+
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+assert (os.path.isfile (module_path  + "libop_abi.{}".format(ext) )), "op module does not exist"
+op_module = tf.load_op_library(module_path + "libop_abi.{}".format(ext))
+

From d32926879716413e64d87f99167752bb1a83821c Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 18 Oct 2019 13:24:49 +0800
Subject: [PATCH 04/38] fix bug in ewald, tested virial

---
 source/lib/include/Ewald.h | 10 ++--
 source/tests/test_ewald.py | 96 +++++++++++++++++++++++++++++++++++---
 2 files changed, 95 insertions(+), 11 deletions(-)

diff --git a/source/lib/include/Ewald.h b/source/lib/include/Ewald.h
index 0fd110c1f0..f02b8600fd 100644
--- a/source/lib/include/Ewald.h
+++ b/source/lib/include/Ewald.h
@@ -99,7 +99,6 @@ cmpt_k(vector<int> & KK,
     assert((KK[dd] / 2) * 2 == KK[dd]);
   }
 }
-       
 
 // compute the reciprocal part of the Ewald sum.
 // outputs: energy force virial
@@ -174,9 +173,12 @@ EwaldReciprocal(VALUETYPE &			ener,
 	// \bm m and \vert m \vert^2
 	VALUETYPE rm[3] = {0,0,0};	  
 	for (int dd = 0; dd < 3; ++dd){
-	  rm[0] += mm[dd] * rec_box[dd*3+0];
-	  rm[1] += mm[dd] * rec_box[dd*3+1];
-	  rm[2] += mm[dd] * rec_box[dd*3+2];
+	  rm[0] += mm[dd] * rec_box[0*3+dd];
+	  rm[1] += mm[dd] * rec_box[1*3+dd];
+	  rm[2] += mm[dd] * rec_box[2*3+dd];
+	  // rm[0] += mm[dd] * rec_box[dd*3+0];
+	  // rm[1] += mm[dd] * rec_box[dd*3+1];
+	  // rm[2] += mm[dd] * rec_box[dd*3+2];
 	}
 	VALUETYPE mm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
 	// energy
diff --git a/source/tests/test_ewald.py b/source/tests/test_ewald.py
index 202fb19755..3c7f96bd77 100644
--- a/source/tests/test_ewald.py
+++ b/source/tests/test_ewald.py
@@ -20,30 +20,36 @@
 
 class TestEwaldRecp (unittest.TestCase) :
     def setUp(self):
-        boxl = 6
-        box_pert = 1
+        boxl = 4.5 # NOTICE grid should not change before and after box pert...
+        box_pert = 0.2
         self.natoms = 16
         self.nframes = 2
         self.ewald_h = 1
         self.ewald_beta = 1
         self.dbox = []
         self.dcoord = []
+        self.rcoord = []
         self.dcharge = []
         for ii in range(self.nframes):
             # box
-            box = np.ones([3,3]) * boxl
+            box = np.eye(3) * boxl
+            box[1][1] += 1
+            box[2][2] += 2
             box += np.random.random([3,3]) * box_pert
-            self.dbox.append(0.5 * (box + box.T))
+            box = 0.5 * (box + box.T)
+            self.dbox.append(box)
             # scaled 
-            self.coord = np.random.random([self.natoms, 3])
+            coord = np.random.random([self.natoms, 3])
+            self.rcoord.append(coord)
             # real coords
-            self.dcoord.append(np.matmul(self.coord, box))
+            self.dcoord.append(np.matmul(coord, box))
             # charge
             dcharge = np.random.random([self.natoms])
             dcharge -= np.average(dcharge)
             assert(np.abs(np.sum(self.dcharge) - 0) < 1e-12)
             self.dcharge.append(dcharge)
         self.dbox = np.array(self.dbox).reshape([self.nframes, 9])
+        self.rcoord = np.array(self.rcoord).reshape([self.nframes, 3*self.natoms])
         self.dcoord = np.array(self.dcoord).reshape([self.nframes, 3*self.natoms])
         self.dcharge = np.array(self.dcharge).reshape([self.nframes, self.natoms])
         # place holders
@@ -54,7 +60,7 @@ def setUp(self):
 
     def test_force(self):
         hh = 1e-4
-        places = 5
+        places = 4
         sess = tf.Session()
         t_energy, t_force, t_virial \
             = op_module.ewald_recp(self.coord, self.charge, self.nloc, self.box, 
@@ -94,6 +100,82 @@ def test_force(self):
                                            msg = "frame %d force component [%d,%d] failed" % (ff, idx, dd))
 
 
+    def test_virial(self):
+        hh = 1e-4
+        places = 5
+        sess = tf.Session()
+        t_energy, t_force, t_virial \
+            = op_module.ewald_recp(self.coord, self.charge, self.nloc, self.box, 
+                                   ewald_h = self.ewald_h,
+                                   ewald_beta = self.ewald_beta)
+        [virial] = sess.run([t_virial], 
+                           feed_dict = {
+                               self.coord:  self.dcoord,
+                               self.charge: self.dcharge,
+                               self.box:    self.dbox,
+                               self.nloc:   [self.natoms],
+                           })
+
+        from scipy.stats import ortho_group
+
+        
+
+        self.dbox3 = np.reshape(self.dbox, [self.nframes, 3,3])
+        self.drbox3 = np.linalg.inv(self.dbox3)
+        # print(np.matmul(self.dbox3, self.drbox3))
+        # print(np.matmul(self.drbox3, self.dbox3))
+        self.dcoord3 = np.reshape(self.dcoord, [self.nframes, self.natoms, 3])
+        self.rcoord3 = np.matmul(self.dcoord3, self.drbox3)
+        # print(np.linalg.norm(self.dcoord - np.matmul(self.rcoord3, self.dbox3).reshape([self.nframes,-1])))
+        # print(np.matmul(self.dcoord3, self.drbox3))
+        # print('check rcoord ', np.linalg.norm(self.rcoord3 - self.rcoord.reshape([self.nframes, self.natoms, 3])))
+
+        num_deriv = np.zeros([self.nframes,3,3])
+        for ii in range(3):
+            for jj in range(3):
+                dbox3p = np.copy(self.dbox3)
+                dbox3m = np.copy(self.dbox3)
+                dbox3p[:,ii,jj] = self.dbox3[:,ii,jj] + hh
+                dbox3m[:,ii,jj] = self.dbox3[:,ii,jj] - hh
+                dboxp = np.reshape(dbox3p, [-1,9])
+                dboxm = np.reshape(dbox3m, [-1,9])
+                dcoord = self.dcoord
+                dcoord3p = np.matmul(self.rcoord3, dbox3p)
+                dcoord3m = np.matmul(self.rcoord3, dbox3m)
+                dcoordp = np.reshape(dcoord3p, [self.nframes,-1])
+                dcoordm = np.reshape(dcoord3m, [self.nframes,-1])
+                energyp = sess.run([t_energy],
+                                   feed_dict = {
+                                       self.coord:  dcoordp,
+                                       self.charge: self.dcharge,
+                                       self.box:    dboxp,
+                                       self.nloc:   [self.natoms],
+                                   })
+                energym = sess.run([t_energy], 
+                                   feed_dict = {
+                                       self.coord:  dcoordm,
+                                       self.charge: self.dcharge,
+                                       self.box:    dboxm,
+                                       self.nloc:   [self.natoms],
+                                   })
+                num_deriv[:,ii,jj] = -(energyp[0] - energym[0]) / (2.*hh)
+        dbox3t = np.transpose(self.dbox3, [0,2,1])
+        t_esti = np.matmul(num_deriv, dbox3t)
+        # # t_esti = np.matmul(num_deriv, self.dbox3)
+        # print(num_deriv[0])
+        # print(t_esti[0])
+        # # print(0.5 * (t_esti[0] + t_esti[0].T))
+        # print(virial[0].reshape([3,3]))
+        # # print(0.5 * (t_esti[0] + t_esti[0].T) - virial[0].reshape([3,3]))
+        # print(0.5 * (t_esti[0] + t_esti[0]) - virial[0].reshape([3,3]))
+        # print(0.5 * (t_esti[0] + t_esti[0].T) - virial[0].reshape([3,3]))        
+        for ff in range(self.nframes):
+            for ii in range(3):
+                for jj in range(3):                
+                    self.assertAlmostEqual(t_esti[ff][ii][jj], virial[ff,ii*3+jj], 
+                                           places = places,
+                                           msg = "frame %d forcvirial component [%d,%d] failed" % (ff, ii, jj))
+            
                 
 
 

From fdad3ca1b6a6015b87d2547e904950876e2b0537 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 23 Oct 2019 15:55:40 +0800
Subject: [PATCH 05/38] add test for ewald_recp python interface. add test for
 data type sel

---
 source/tests/test_deepmd_data.py | 56 ++++++++++++++++++++++++++++++++
 source/tests/test_ewald.py       | 35 ++++++++++++++++++++
 source/train/EwaldRecp.py        | 42 ++++++++++++++++++++++++
 3 files changed, 133 insertions(+)

diff --git a/source/tests/test_deepmd_data.py b/source/tests/test_deepmd_data.py
index 6db3017d00..005ab26893 100644
--- a/source/tests/test_deepmd_data.py
+++ b/source/tests/test_deepmd_data.py
@@ -10,6 +10,62 @@
 else:
     places = 12
 
+class TestDataTypeSel(unittest.TestCase):
+    def setUp(self):
+        self.data_name = 'test_data'
+        os.makedirs(self.data_name, exist_ok = True)
+        os.makedirs(os.path.join(self.data_name,'set.foo'), exist_ok = True)
+        np.savetxt(os.path.join(self.data_name, 'type.raw'), 
+                   np.array([0, 1, 1, 0, 1, 1]), 
+                   fmt = '%d')
+        self.nframes = 3
+        self.natoms = 6
+        # coord
+        path = os.path.join(self.data_name, 'set.foo', 'coord.npy')
+        self.coord = np.random.random([self.nframes, self.natoms, 3])
+        np.save(path, np.reshape(self.coord, [self.nframes, -1]))
+        self.coord = self.coord[:,[0,3,1,2,4,5],:]
+        self.coord = self.coord.reshape([self.nframes, -1])
+        # box
+        path = os.path.join(self.data_name, 'set.foo', 'box.npy')
+        self.box = np.random.random([self.nframes, 9])
+        np.save(path, self.box)
+        # value
+        path = os.path.join(self.data_name, 'set.foo', 'value_1.npy')
+        self.value_1 = np.arange(self.nframes * 2)
+        self.value_1 = np.reshape(self.value_1, [self.nframes, 2])
+        np.save(path, self.value_1)
+        # value
+        path = os.path.join(self.data_name, 'set.foo', 'value_2.npy')
+        self.value_2 = np.arange(self.nframes * 4)
+        self.value_2 = np.reshape(self.value_2, [self.nframes, 4])
+        np.save(path, self.value_2)
+
+    def tearDown(self) :
+        shutil.rmtree(self.data_name)
+
+    def test_load_set_1(self) :
+        dd = DeepmdData(self.data_name)\
+             .add('value_1', 1, atomic=True, must=True, type_sel = [0])
+        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
+        self.assertEqual(data['value_1'].shape, (self.nframes, 2))
+        for ii in range(self.nframes):
+            for jj in range(2):
+                self.assertAlmostEqual(data['value_1'][ii][jj],
+                                       self.value_1[ii][jj])
+                
+
+    def test_load_set_2(self) :
+        dd = DeepmdData(self.data_name)\
+             .add('value_2', 1, atomic=True, must=True, type_sel = [1])
+        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
+        self.assertEqual(data['value_2'].shape, (self.nframes, 4))
+        for ii in range(self.nframes):
+            for jj in range(4):
+                self.assertAlmostEqual(data['value_2'][ii][jj],
+                                       self.value_2[ii][jj])                
+
+
 class TestData (unittest.TestCase) :
     def setUp (self) :
         self.data_name = 'test_data'
diff --git a/source/tests/test_ewald.py b/source/tests/test_ewald.py
index 3c7f96bd77..4e8586aa3c 100644
--- a/source/tests/test_ewald.py
+++ b/source/tests/test_ewald.py
@@ -7,6 +7,7 @@
 from deepmd.RunOptions import global_np_float_precision
 from deepmd.RunOptions import global_ener_float_precision
 from deepmd.EwaldRecp import op_module
+from deepmd.EwaldRecp import EwaldRecp
 
 if global_np_float_precision == np.float32 :
     global_default_fv_hh = 1e-2
@@ -58,6 +59,40 @@ def setUp(self):
         self.box        = tf.placeholder(global_tf_float_precision, [None, 9], name='t_box')
         self.nloc    = tf.placeholder(tf.int32, [1], name = "t_nloc")        
 
+    def test_py_interface(self):
+        hh = 1e-4
+        places = 4
+        sess = tf.Session()
+        t_energy, t_force, t_virial \
+            = op_module.ewald_recp(self.coord, self.charge, self.nloc, self.box, 
+                                   ewald_h = self.ewald_h,
+                                   ewald_beta = self.ewald_beta)
+        [e, f, v] = sess.run([t_energy, t_force, t_virial], 
+                           feed_dict = {
+                               self.coord:  self.dcoord,
+                               self.charge: self.dcharge,
+                               self.box:    self.dbox,
+                               self.nloc:   [self.natoms],
+                           })
+        er = EwaldRecp(self.ewald_h, self.ewald_beta)
+        e1, f1, v1 = er.eval(self.dcoord, self.dcharge, self.dbox)        
+        for ff in range(self.nframes):
+            self.assertAlmostEqual(e[ff], e1[ff], 
+                                   places = places,
+                                   msg = "frame %d energy failed" % (ff))
+            for idx in range(self.natoms):
+                for dd in range(3):
+                    self.assertAlmostEqual(f[ff, idx*3+dd], f1[ff,idx*3+dd], 
+                                           places = places,
+                                           msg = "frame %d force component [%d,%d] failed" % (ff, idx, dd))
+            for d0 in range(3):
+                for d1 in range(3):
+                    self.assertAlmostEqual(v[ff, d0*3+d1], v[ff,d0*3+d1], 
+                                           places = places,
+                                           msg = "frame %d virial component [%d,%d] failed" % (ff, d0, d1))
+
+
+
     def test_force(self):
         hh = 1e-4
         places = 4
diff --git a/source/train/EwaldRecp.py b/source/train/EwaldRecp.py
index abbeba3608..20217d0428 100644
--- a/source/train/EwaldRecp.py
+++ b/source/train/EwaldRecp.py
@@ -20,3 +20,45 @@
 assert (os.path.isfile (module_path  + "libop_abi.{}".format(ext) )), "op module does not exist"
 op_module = tf.load_op_library(module_path + "libop_abi.{}".format(ext))
 
+class EwaldRecp () :
+    def __init__(self, 
+                 hh,
+                 beta):
+        self.hh = hh
+        self.beta = beta
+        self.sess = tf.Session()
+
+    def eval(self, 
+             coord, 
+             charge, 
+             box) :
+        coord = np.array(coord)
+        charge = np.array(charge)
+        box = np.array(box)
+        nframes = charge.shape[0]
+        natoms = charge.shape[1]
+        coord = np.reshape(coord, [nframes, 3*natoms])
+        box = np.reshape(box, [nframes, 9])
+        # place holders
+        t_coord      = tf.placeholder(global_tf_float_precision, [None, natoms * 3], name='t_coord')
+        t_charge     = tf.placeholder(global_tf_float_precision, [None, natoms], name='t_charge')
+        t_box        = tf.placeholder(global_tf_float_precision, [None, 9], name='t_box')
+        t_nloc    = tf.placeholder(tf.int32, [1], name = "t_nloc")
+        
+        t_energy, t_force, t_virial \
+            = op_module.ewald_recp(t_coord, t_charge, t_nloc, t_box, 
+                                   ewald_h = self.hh,
+                                   ewald_beta = self.beta)
+
+        [energy, force, virial] \
+            = self.sess.run([t_energy, t_force, t_virial], 
+                            feed_dict = {
+                                t_coord:  coord,
+                                t_charge: charge,
+                                t_box:    box,
+                                t_nloc:   [natoms],
+                            })
+
+        return energy, force, virial
+             
+             

From 8b729fe56d4bb695f6485101c047698176736470 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sun, 27 Oct 2019 17:33:54 +0800
Subject: [PATCH 06/38] fix bug of wrong index in prod virial. more strict
 virial test

---
 source/lib/include/Ewald.h           |   1 +
 source/op/prod_virial.cc             |   6 +-
 source/op/prod_virial_grad.cc        |   6 +-
 source/op/prod_virial_se_a.cc        |   2 +-
 source/op/prod_virial_se_a_grad.cc   |   2 +-
 source/tests/common.py               | 146 ++++++++++++++++++---------
 source/tests/test_descrpt_nonsmth.py |  55 ++++------
 source/tests/test_descrpt_se_ar.py   |  17 ++--
 source/tests/test_descrpt_se_r.py    |  17 ++--
 source/tests/test_descrpt_smooth.py  |  17 ++--
 source/tests/test_ewald.py           |   2 +-
 source/tests/test_tab_nonsmth.py     |  19 ++--
 source/tests/test_tab_smooth.py      |  19 ++--
 13 files changed, 185 insertions(+), 124 deletions(-)

diff --git a/source/lib/include/Ewald.h b/source/lib/include/Ewald.h
index f02b8600fd..e6fc837386 100644
--- a/source/lib/include/Ewald.h
+++ b/source/lib/include/Ewald.h
@@ -5,6 +5,7 @@
 
 #include "SimulationRegion.h"
 
+// 8.988e9 / pc.electron_volt / pc.angstrom * (1.602e-19)**2
 const double ElectrostaticConvertion = 14.39964535475696995031;
 
 template <typename VALUETYPE>
diff --git a/source/op/prod_virial.cc b/source/op/prod_virial.cc
index df061aa12f..55b0b4b60d 100644
--- a/source/op/prod_virial.cc
+++ b/source/op/prod_virial.cc
@@ -147,7 +147,7 @@ class ProdVirialOp : public OpKernel {
 	      VALUETYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + aa);
 	      for (int dd0 = 0; dd0 < 3; ++dd0){
 		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + dd1);
+		  VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + dd0);
 		  virial (virial_iter + dd0 * 3 + dd1) += tmp_v;
 		  atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += tmp_v;
 		}
@@ -159,7 +159,7 @@ class ProdVirialOp : public OpKernel {
 	      VALUETYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + aa);
 	      for (int dd0 = 0; dd0 < 3; ++dd0){
 		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + dd1);
+		  VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + dd0);
 		  virial (virial_iter + dd0 * 3 + dd1) += tmp_v;
 		  atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += tmp_v;
 		}
@@ -173,7 +173,7 @@ class ProdVirialOp : public OpKernel {
 	      VALUETYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + aa);
 	      for (int dd0 = 0; dd0 < 3; ++dd0){
 		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + dd1);
+		  VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + dd0);
 		  virial (virial_iter + dd0 * 3 + dd1) += tmp_v;
 		  atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += tmp_v;
 		}
diff --git a/source/op/prod_virial_grad.cc b/source/op/prod_virial_grad.cc
index aaa0e75b14..5257467029 100644
--- a/source/op/prod_virial_grad.cc
+++ b/source/op/prod_virial_grad.cc
@@ -152,7 +152,7 @@ class ProdVirialGradOp : public OpKernel
 	      for (int dd0 = 0; dd0 < 3; ++dd0){
 		for (int dd1 = 0; dd1 < 3; ++dd1){
 		  grad_net (grad_net_iter + i_idx * ndescrpt + aa) += 
-		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + dd1);
+		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + dd0);
 		}
 	      }
 	    }
@@ -162,7 +162,7 @@ class ProdVirialGradOp : public OpKernel
 	      for (int dd0 = 0; dd0 < 3; ++dd0){
 		for (int dd1 = 0; dd1 < 3; ++dd1){
 		  grad_net (grad_net_iter + i_idx * ndescrpt + aa) += 
-		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + dd1);
+		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + dd0);
 		}
 	      }
 	    }
@@ -174,7 +174,7 @@ class ProdVirialGradOp : public OpKernel
 	      for (int dd0 = 0; dd0 < 3; ++dd0){
 		for (int dd1 = 0; dd1 < 3; ++dd1){
 		  grad_net (grad_net_iter + i_idx * ndescrpt + aa) += 
-		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + dd1);
+		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + dd0);
 		}
 	      }
 	    }
diff --git a/source/op/prod_virial_se_a.cc b/source/op/prod_virial_se_a.cc
index 89077750af..2f71d37505 100644
--- a/source/op/prod_virial_se_a.cc
+++ b/source/op/prod_virial_se_a.cc
@@ -134,7 +134,7 @@ class ProdVirialSeAOp : public OpKernel {
 	    VALUETYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + aa);
 	    for (int dd0 = 0; dd0 < 3; ++dd0){
 	      for (int dd1 = 0; dd1 < 3; ++dd1){
-		VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) *  in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + dd1);
+		VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *  in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + dd0);
 		virial (virial_iter + dd0 * 3 + dd1) -= tmp_v;
 		atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) -= tmp_v;
 	      }
diff --git a/source/op/prod_virial_se_a_grad.cc b/source/op/prod_virial_se_a_grad.cc
index 0d19a1c19a..660f652566 100644
--- a/source/op/prod_virial_se_a_grad.cc
+++ b/source/op/prod_virial_se_a_grad.cc
@@ -137,7 +137,7 @@ class ProdVirialSeAGradOp : public OpKernel
 	    for (int dd0 = 0; dd0 < 3; ++dd0){
 	      for (int dd1 = 0; dd1 < 3; ++dd1){
 		grad_net (grad_net_iter + i_idx * ndescrpt + aa) -= 
-		    -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) * in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + dd1);
+		    -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + dd0);
 	      }
 	    }
 	  }
diff --git a/source/tests/common.py b/source/tests/common.py
index 812a238c4a..e9ad53dbf2 100644
--- a/source/tests/common.py
+++ b/source/tests/common.py
@@ -11,7 +11,7 @@
     global_default_dw_hh = 1e-2
     global_default_places = 3
 else :
-    global_default_fv_hh = 1e-6
+    global_default_fv_hh = 1e-5
     global_default_dw_hh = 1e-4
     global_default_places = 5
 
@@ -38,23 +38,30 @@ def __init__ (self,
                   seed = 1) :
         coord = [[0.0, 0.0, 0.1], [1.1, 0.0, 0.1], [0.0, 1.1, 0.1], 
                  [4.0, 0.0, 0.0], [5.1, 0.0, 0.0], [4.0, 1.1, 0.0]]
+        self.nframes = 1
         self.coord = np.array(coord)
+        self.coord = self._copy_nframes(self.coord)
         np.random.seed(seed)
         self.coord += rand_pert * np.random.random(self.coord.shape)
         self.fparam = np.array([[0.1, 0.2]])
         self.aparam = np.tile(self.fparam, [1, 6])
+        self.fparam = self._copy_nframes(self.fparam)
+        self.aparam = self._copy_nframes(self.aparam)
         self.atype = np.array([0, 1, 1, 0, 1, 1], dtype = int)
         self.cell = 20 * np.eye(3)
-        self.nframes = 1
+        self.cell = self._copy_nframes(self.cell)
         self.coord = self.coord.reshape([self.nframes, -1])
         self.cell = self.cell.reshape([self.nframes, -1])
         self.natoms = len(self.atype)        
         self.idx_map = np.lexsort ((np.arange(self.natoms), self.atype))
-        self.coord = self.coord.reshape([1, -1, 3])
+        self.coord = self.coord.reshape([self.nframes, -1, 3])
         self.coord = self.coord[:,self.idx_map,:]
-        self.coord = self.coord.reshape([1, -1])        
+        self.coord = self.coord.reshape([self.nframes, -1])        
         self.atype = self.atype[self.idx_map]
-        self.datype = np.tile(self.atype, [self.nframes,1])
+        self.datype = self._copy_nframes(self.atype)
+
+    def _copy_nframes(self, xx):
+        return np.tile(xx, [self.nframes, 1])
         
     def get_data(self) :
         return self.coord, self.cell, self.datype
@@ -68,39 +75,80 @@ def get_natoms (self) :
     def get_ntypes(self) :
         return max(self.atype) + 1
 
+    # def get_test_box_data (self,
+    #                        hh) :
+    #     coord0_, box0_, type0_ = self.get_data()
+    #     coord0 = coord0_[0]
+    #     box0 = box0_[0]
+    #     type0 = type0_[0]
+    #     nc = np.array( [coord0, coord0*(1+hh), coord0*(1-hh)] )
+    #     nb = np.array( [box0, box0*(1+hh), box0*(1-hh)] )
+    #     nt = np.array( [type0, type0, type0] )
+    #     for dd in range(3) :
+    #         tmpc = np.copy (coord0)
+    #         tmpb = np.copy (box0)
+    #         tmpc = np.reshape(tmpc, [-1, 3])
+    #         tmpc [:,dd] *= (1+hh)
+    #         tmpc = np.reshape(tmpc, [-1])
+    #         tmpb = np.reshape(tmpb, [-1, 3])
+    #         tmpb [dd,:] *= (1+hh)
+    #         tmpb = np.reshape(tmpb, [-1])
+    #         nc = np.append (nc, [tmpc], axis = 0)
+    #         nb = np.append (nb, [tmpb], axis = 0)
+    #         nt = np.append (nt, [type0], axis = 0)
+    #         tmpc = np.copy (coord0)
+    #         tmpb = np.copy (box0)
+    #         tmpc = np.reshape(tmpc, [-1, 3])
+    #         tmpc [:,dd] *= (1-hh)
+    #         tmpc = np.reshape(tmpc, [-1])
+    #         tmpb = np.reshape(tmpb, [-1, 3])
+    #         tmpb [dd,:] *= (1-hh)
+    #         tmpb = np.reshape(tmpb, [-1])
+    #         nc = np.append (nc, [tmpc], axis = 0)
+    #         nb = np.append (nb, [tmpb], axis = 0)
+    #         nt = np.append (nt, [type0], axis = 0)
+    #     return nc, nb, nt
+
     def get_test_box_data (self,
-                           hh) :
+                           hh, 
+                           rand_pert = 0.1) :
         coord0_, box0_, type0_ = self.get_data()
-        coord0 = coord0_[0]
-        box0 = box0_[0]
-        type0 = type0_[0]
-        nc = np.array( [coord0, coord0*(1+hh), coord0*(1-hh)] )
-        nb = np.array( [box0, box0*(1+hh), box0*(1-hh)] )
-        nt = np.array( [type0, type0, type0] )
-        for dd in range(3) :
-            tmpc = np.copy (coord0)
-            tmpb = np.copy (box0)
-            tmpc = np.reshape(tmpc, [-1, 3])
-            tmpc [:,dd] *= (1+hh)
-            tmpc = np.reshape(tmpc, [-1])
-            tmpb = np.reshape(tmpb, [-1, 3])
-            tmpb [dd,:] *= (1+hh)
-            tmpb = np.reshape(tmpb, [-1])
-            nc = np.append (nc, [tmpc], axis = 0)
-            nb = np.append (nb, [tmpb], axis = 0)
-            nt = np.append (nt, [type0], axis = 0)
-            tmpc = np.copy (coord0)
-            tmpb = np.copy (box0)
-            tmpc = np.reshape(tmpc, [-1, 3])
-            tmpc [:,dd] *= (1-hh)
-            tmpc = np.reshape(tmpc, [-1])
-            tmpb = np.reshape(tmpb, [-1, 3])
-            tmpb [dd,:] *= (1-hh)
-            tmpb = np.reshape(tmpb, [-1])
-            nc = np.append (nc, [tmpc], axis = 0)
-            nb = np.append (nb, [tmpb], axis = 0)
-            nt = np.append (nt, [type0], axis = 0)
-        return nc, nb, nt
+        coord = coord0_[0]
+        box = box0_[0]
+        box += rand_pert * np.random.random(box.shape)
+        atype = type0_[0]
+        nframes = 1
+        natoms = coord.size // 3
+        box3 = np.reshape(box, [nframes, 3,3])
+        rbox3 = np.linalg.inv(box3)
+        coord3 = np.reshape(coord, [nframes, natoms, 3])
+        rcoord3 = np.matmul(coord3, rbox3)
+        
+        all_coord = [coord.reshape([nframes, natoms*3])]
+        all_box = [box.reshape([nframes,9])]
+        all_atype = [atype]
+        for ii in range(3):
+            for jj in range(3):
+                box3p = np.copy(box3)
+                box3m = np.copy(box3)
+                box3p[:,ii,jj] = box3[:,ii,jj] + hh
+                box3m[:,ii,jj] = box3[:,ii,jj] - hh
+                boxp = np.reshape(box3p, [-1,9])
+                boxm = np.reshape(box3m, [-1,9])
+                coord3p = np.matmul(rcoord3, box3p)
+                coord3m = np.matmul(rcoord3, box3m)
+                coordp = np.reshape(coord3p, [nframes,-1])
+                coordm = np.reshape(coord3m, [nframes,-1])
+                all_coord.append(coordp)
+                all_coord.append(coordm)
+                all_box.append(boxp)
+                all_box.append(boxm)
+                all_atype.append(atype)
+                all_atype.append(atype)
+        all_coord = np.reshape(all_coord, [-1, natoms * 3])
+        all_box = np.reshape(all_box, [-1, 9])
+        all_atype = np.reshape(all_atype, [-1, natoms])        
+        return all_coord, all_box, all_atype
 
 
 def force_test (inter, 
@@ -178,16 +226,22 @@ def virial_test (inter,
                               inter.type:      dtype,
                               inter.tnatoms:   inter.natoms}
         )
-    # check
-    ana_vir3 = (virial[0][0] + virial[0][4] + virial[0][8])/3. / comp_vol(dbox[0])
-    num_vir3 = -(energy[1] - energy[2]) / (comp_vol(dbox[1]) - comp_vol(dbox[2]))
-    testCase.assertAlmostEqual(ana_vir3, num_vir3, places=places)
-    vir_idx = [0, 4, 8]
-    for dd in range (3) :
-        ana_v = (virial[0][vir_idx[dd]] / comp_vol(dbox[0]))
-        idx = 2 * (dd+1) + 1
-        num_v = ( -(energy[idx] - energy[idx+1]) / (comp_vol(dbox[idx]) - comp_vol(dbox[idx+1])) )
-        testCase.assertAlmostEqual(ana_v, num_v, places=places)
+    ana_vir = virial[0].reshape([3,3])
+    num_vir = np.zeros([3,3])
+    for ii in range(3):
+        for jj in range(3):
+            ep = energy[1+(ii*3+jj)*2+0]
+            em = energy[1+(ii*3+jj)*2+1]
+            num_vir[ii][jj] = -(ep - em) / (2.*hh)
+    num_vir = np.transpose(num_vir, [1,0])    
+    box3 = dbox[0].reshape([3,3])
+    num_vir = np.matmul(num_vir, box3)
+    for ii in range(3):
+        for jj in range(3):
+            testCase.assertAlmostEqual(ana_vir[ii][jj], num_vir[ii][jj],
+                                       places=places, 
+                                       msg = 'virial component %d %d ' % (ii,jj))
+    
 
 
 def force_dw_test (inter, 
diff --git a/source/tests/test_descrpt_nonsmth.py b/source/tests/test_descrpt_nonsmth.py
index 0414607b58..91a8915ef8 100644
--- a/source/tests/test_descrpt_nonsmth.py
+++ b/source/tests/test_descrpt_nonsmth.py
@@ -24,9 +24,9 @@
 from deepmd.RunOptions import global_ener_float_precision
 
 class Inter():
-    def __init__ (self,
-                  data,
-                  comp = 0) :
+    def setUp (self,
+               data,
+               comp = 0) :
         self.sess = tf.Session()
         self.data = data
         self.natoms = self.data.get_natoms()
@@ -77,12 +77,13 @@ def comp_ef (self,
                  tnatoms,
                  name,
                  reuse = None) :
+        t_default_mesh = tf.constant(self.default_mesh)
         descrpt, descrpt_deriv, rij, nlist, axis, rot_mat \
             = op_module.descrpt (dcoord, 
                                  dtype,
                                  tnatoms,
                                  dbox, 
-                                 tf.constant(self.default_mesh),
+                                 t_default_mesh,
                                  self.t_avg,
                                  self.t_std,
                                  rcut_a = self.rcut_a, 
@@ -153,49 +154,29 @@ def comp_v_dw (self,
 
 
 class TestNonSmooth(Inter, unittest.TestCase):
-    def __init__ (self, *args, **kwargs):
+    # def __init__ (self, *args, **kwargs):
+    #     self.places = 5
+    #     data = Data()
+    #     Inter.__init__(self, data)
+    #     unittest.TestCase.__init__(self, *args, **kwargs)
+    #     self.controller = object()
+
+    def setUp(self):
         self.places = 5
         data = Data()
-        Inter.__init__(self, data)
-        unittest.TestCase.__init__(self, *args, **kwargs)
-        self.controller = object()
+        Inter.setUp(self, data)
 
     def test_force (self) :
-        force_test(self, self)
-        # t_energy, t_force, t_virial \
-        #     = self.comp_ef (self.coord, self.box, self.type, self.tnatoms, name = "test")
-        # self.sess.run (tf.global_variables_initializer())
-        # dcoord, dbox, dtype = self.data.get_data ()
-        # hh = 1e-6
-        # dcoordp = np.copy(dcoord)
-        # dcoordm = np.copy(dcoord)
-        # dcoordp[0,0] = dcoord[0,0] + hh
-        # dcoordm[0,0] = dcoord[0,0] - hh
-        # [axis0, nlist0, d0] = self.sess.run ([self.axis, self.nlist, self.descrpt], 
-        #                                  feed_dict = {
-        #                                      self.coord:     dcoordp,
-        #                                      self.box:       dbox,
-        #                                      self.type:      dtype,
-        #                                      self.tnatoms:   self.natoms}
-        # )
-        # [axis1, nlist1, d1] = self.sess.run ([self.axis, self.nlist, self.descrpt], 
-        #                                  feed_dict = {
-        #                                      self.coord:     dcoordm,
-        #                                      self.box:       dbox,
-        #                                      self.type:      dtype,
-        #                                      self.tnatoms:   self.natoms}
-        # )
-        # print((nlist0 - nlist1))
-        # print((axis0 - axis1))
+        force_test(self, self, suffix = '_se')
 
     def test_virial (self) :
-        virial_test(self, self)
+        virial_test(self, self, suffix = '_se')
 
     def test_force_dw (self) :
-        force_dw_test(self, self)
+        force_dw_test(self, self, suffix = '_se')
 
     def test_virial_dw (self) :
-        virial_dw_test(self, self)
+        virial_dw_test(self, self, suffix = '_se')
 
 
 if __name__ == '__main__':
diff --git a/source/tests/test_descrpt_se_ar.py b/source/tests/test_descrpt_se_ar.py
index 823bbb67ec..651724ae7a 100644
--- a/source/tests/test_descrpt_se_ar.py
+++ b/source/tests/test_descrpt_se_ar.py
@@ -26,8 +26,8 @@
 from deepmd.RunOptions import global_ener_float_precision
 
 class Inter():
-    def __init__ (self, 
-                  data) :
+    def setUp (self, 
+               data) :
         self.sess = tf.Session()
         self.data = data
         self.natoms = self.data.get_natoms()
@@ -99,11 +99,16 @@ def comp_ef (self,
 
 
 class TestDescrptAR(Inter, unittest.TestCase):
-    def __init__ (self, *args, **kwargs):
+    # def __init__ (self, *args, **kwargs):
+    #     data = Data()
+    #     Inter.__init__(self, data)
+    #     unittest.TestCase.__init__(self, *args, **kwargs)
+    #     self.controller = object()
+
+    def setUp(self):
+        self.places = 5
         data = Data()
-        Inter.__init__(self, data)
-        unittest.TestCase.__init__(self, *args, **kwargs)
-        self.controller = object()
+        Inter.setUp(self, data)
 
     def test_force (self) :
         force_test(self, self, suffix = '_se_ar')
diff --git a/source/tests/test_descrpt_se_r.py b/source/tests/test_descrpt_se_r.py
index 83d6dd1f47..ece2fb229c 100644
--- a/source/tests/test_descrpt_se_r.py
+++ b/source/tests/test_descrpt_se_r.py
@@ -26,8 +26,8 @@
 from deepmd.RunOptions import global_ener_float_precision
 
 class Inter():
-    def __init__ (self, 
-                  data) :
+    def setUp (self, 
+               data) :
         self.sess = tf.Session()
         self.data = data
         self.natoms = self.data.get_natoms()
@@ -137,11 +137,16 @@ def comp_v_dw (self,
 
 
 class TestSmooth(Inter, unittest.TestCase):
-    def __init__ (self, *args, **kwargs):
+    # def __init__ (self, *args, **kwargs):
+    #     data = Data()
+    #     Inter.__init__(self, data)
+    #     unittest.TestCase.__init__(self, *args, **kwargs)
+    #     self.controller = object()
+
+    def setUp(self):
+        self.places = 5
         data = Data()
-        Inter.__init__(self, data)
-        unittest.TestCase.__init__(self, *args, **kwargs)
-        self.controller = object()
+        Inter.setUp(self, data)
 
     def test_force (self) :
         force_test(self, self, suffix = '_se_r')
diff --git a/source/tests/test_descrpt_smooth.py b/source/tests/test_descrpt_smooth.py
index f718a5925d..415f56a9aa 100644
--- a/source/tests/test_descrpt_smooth.py
+++ b/source/tests/test_descrpt_smooth.py
@@ -26,8 +26,8 @@
 from deepmd.RunOptions import global_ener_float_precision
 
 class Inter():
-    def __init__ (self, 
-                  data) :
+    def setUp (self, 
+               data) :
         self.sess = tf.Session()
         self.data = data
         self.natoms = self.data.get_natoms()
@@ -148,11 +148,16 @@ def comp_v_dw (self,
 
 
 class TestSmooth(Inter, unittest.TestCase):
-    def __init__ (self, *args, **kwargs):
+    # def __init__ (self, *args, **kwargs):
+    #     data = Data()
+    #     Inter.__init__(self, data)
+    #     unittest.TestCase.__init__(self, *args, **kwargs)
+    #     self.controller = object()
+
+    def setUp(self):
+        self.places = 5
         data = Data()
-        Inter.__init__(self, data)
-        unittest.TestCase.__init__(self, *args, **kwargs)
-        self.controller = object()
+        Inter.setUp(self, data)
 
     def test_force (self) :
         force_test(self, self, suffix = '_smth')
diff --git a/source/tests/test_ewald.py b/source/tests/test_ewald.py
index 4e8586aa3c..b6aee7d6b0 100644
--- a/source/tests/test_ewald.py
+++ b/source/tests/test_ewald.py
@@ -209,7 +209,7 @@ def test_virial(self):
                 for jj in range(3):                
                     self.assertAlmostEqual(t_esti[ff][ii][jj], virial[ff,ii*3+jj], 
                                            places = places,
-                                           msg = "frame %d forcvirial component [%d,%d] failed" % (ff, ii, jj))
+                                           msg = "frame %d virial component [%d,%d] failed" % (ff, ii, jj))
             
                 
 
diff --git a/source/tests/test_tab_nonsmth.py b/source/tests/test_tab_nonsmth.py
index ef77792071..007d41cd63 100644
--- a/source/tests/test_tab_nonsmth.py
+++ b/source/tests/test_tab_nonsmth.py
@@ -35,10 +35,10 @@ def _make_tab(ntype) :
 
 
 class IntplInter(Inter):
-    def __init__ (self, 
-                  data) :
+    def setUp (self, 
+               data) :
         # tabulated
-        Inter.__init__(self, data)
+        Inter.setUp(self, data)
         _make_tab(data.get_ntypes())
         self.srtab = TabInter('tab.xvg')
         self.smin_alpha = 0.3
@@ -153,12 +153,17 @@ def comp_interpl_ef (self,
     
 
 class TestTabNonSmooth(IntplInter, unittest.TestCase):
-    def __init__ (self, *args, **kwargs):
+    # def __init__ (self, *args, **kwargs):
+    #     self.places = 5
+    #     data = Data()
+    #     IntplInter.__init__(self, data)
+    #     unittest.TestCase.__init__(self, *args, **kwargs)
+    #     self.controller = object()
+
+    def setUp(self):
         self.places = 5
         data = Data()
-        IntplInter.__init__(self, data)
-        unittest.TestCase.__init__(self, *args, **kwargs)
-        self.controller = object()
+        IntplInter.setUp(self, data)
 
     def test_force (self) :
         force_test(self, self, places=5, suffix = '_tab')
diff --git a/source/tests/test_tab_smooth.py b/source/tests/test_tab_smooth.py
index 28219cd504..6d34aa06d1 100644
--- a/source/tests/test_tab_smooth.py
+++ b/source/tests/test_tab_smooth.py
@@ -35,10 +35,10 @@ def _make_tab(ntype) :
 
 
 class IntplInter(Inter):
-    def __init__ (self, 
-                  data) :
+    def setUp (self, 
+               data) :
         # tabulated
-        Inter.__init__(self, data)
+        Inter.setUp(self, data)
         _make_tab(data.get_ntypes())
         self.srtab = TabInter('tab.xvg')
         self.smin_alpha = 0.3
@@ -151,12 +151,17 @@ def comp_ef (self,
     
 
 class TestTabSmooth(IntplInter, unittest.TestCase):
-    def __init__ (self, *args, **kwargs):
+    # def __init__ (self, *args, **kwargs):
+    #     self.places = 5
+    #     data = Data()
+    #     IntplInter.__init__(self, data)
+    #     unittest.TestCase.__init__(self, *args, **kwargs)
+    #     self.controller = object()
+
+    def setUp(self):
         self.places = 5
         data = Data()
-        IntplInter.__init__(self, data)
-        unittest.TestCase.__init__(self, *args, **kwargs)
-        self.controller = object()
+        IntplInter.setUp(self, data)
 
     def test_force (self) :
         force_test(self, self, places=5, suffix = '_tab_smth')

From c23adeeda7c879f596be98fb8825b12a07b856eb Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 28 Oct 2019 11:12:53 +0800
Subject: [PATCH 07/38] implement data modifier, substract long-range
 interaction from data

---
 source/scripts/freeze.py           |   2 +-
 source/tests/test_data_modifier.py | 157 ++++++++++++++++
 source/tests/test_sel_idx.py       |  20 ++
 source/train/CMakeLists.txt        |   2 +-
 source/train/Data.py               |  14 +-
 source/train/DataModifier.py       | 293 +++++++++++++++++++++++++++++
 source/train/DataSystem.py         |   7 +-
 source/train/DeepEval.py           |  22 +--
 source/train/DescrptSeA.py         |  10 +
 source/train/common.py             |  11 ++
 source/train/train.py              |  27 ++-
 11 files changed, 542 insertions(+), 23 deletions(-)
 create mode 100644 source/tests/test_data_modifier.py
 create mode 100644 source/tests/test_sel_idx.py
 create mode 100644 source/train/DataModifier.py

diff --git a/source/scripts/freeze.py b/source/scripts/freeze.py
index 62a7f4559f..06f1df86c1 100755
--- a/source/scripts/freeze.py
+++ b/source/scripts/freeze.py
@@ -41,7 +41,7 @@ def _make_node_names(model_type = None) :
     elif model_type == 'wfc':
         nodes = "o_wfc,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     elif model_type == 'dipole':
-        nodes = "o_dipole,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
+        nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     elif model_type == 'polar':
         nodes = "o_polar,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     else:
diff --git a/source/tests/test_data_modifier.py b/source/tests/test_data_modifier.py
new file mode 100644
index 0000000000..dd41a27863
--- /dev/null
+++ b/source/tests/test_data_modifier.py
@@ -0,0 +1,157 @@
+import os,sys,platform,json
+import numpy as np
+import unittest
+from deepmd.env import tf
+
+from deepmd.common import j_must_have, data_requirement
+from deepmd.RunOptions import RunOptions
+from deepmd.Trainer import NNPTrainer
+from deepmd.DataSystem import DeepmdDataSystem
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.EwaldRecp import EwaldRecp
+from deepmd.DataModifier import DipoleChargeModifier
+
+from common import Data
+
+if global_np_float_precision == np.float32 :
+    global_default_fv_hh = 1e-2
+    global_default_dw_hh = 1e-2
+    global_default_places = 3
+else :
+    global_default_fv_hh = 1e-6
+    global_default_dw_hh = 1e-4
+    global_default_places = 5
+
+modifier_datapath = 'data_modifier'
+
+class Args() :
+    INPUT = os.path.join(modifier_datapath, 'dipole.json')
+    restart = None
+    init_model = None
+    inter_threads = 0
+
+class TestDataModifier (unittest.TestCase) :
+
+    def setUp(self):
+        # with tf.variable_scope('load', reuse = False) :
+        tf.reset_default_graph()        
+        self._setUp()
+
+    def tearDown(self):
+        tf.reset_default_graph()        
+
+    def _setUp(self):
+        args = Args()
+        run_opt = RunOptions(args, False)
+        with open (args.INPUT, 'r') as fp:
+           jdata = json.load (fp)
+
+        # init model
+        model = NNPTrainer (jdata, run_opt = run_opt)
+        rcut = model.model.get_rcut()
+
+        # init data system
+        systems = j_must_have(jdata['training'], 'systems')
+        set_pfx = j_must_have(jdata['training'], 'set_prefix')
+        batch_size = j_must_have(jdata['training'], 'batch_size')
+        test_size = j_must_have(jdata['training'], 'numb_test')    
+        data = DeepmdDataSystem(systems, 
+                                batch_size, 
+                                test_size, 
+                                rcut, 
+                                set_prefix=set_pfx, 
+                                run_opt=run_opt)
+        data.add_dict(data_requirement)
+
+        # clear the default graph
+        tf.reset_default_graph()
+
+        # build the model with stats from the first system
+        model.build (data)
+        
+        # freeze the graph
+        with tf.Session() as sess:
+            init_op = tf.global_variables_initializer()
+            sess.run(init_op)
+            graph = tf.get_default_graph()
+            input_graph_def = graph.as_graph_def()
+            nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
+            output_graph_def = tf.graph_util.convert_variables_to_constants(
+                sess,
+                input_graph_def,
+                nodes.split(",") 
+            )
+            output_graph = os.path.join(modifier_datapath, 'dipole.pb')
+            with tf.gfile.GFile(output_graph, "wb") as f:
+                f.write(output_graph_def.SerializeToString())
+
+    def test_fv(self):
+        # with tf.variable_scope('load', reuse = False) :
+        self._test_fv()
+            
+    def _test_fv (self):
+        dcm = DipoleChargeModifier(os.path.join(modifier_datapath, "dipole.pb"),
+                                   [-8],
+                                   [6, 1],
+                                   1,
+                                   0.25)
+        data = Data()
+        coord, box, atype = data.get_data()
+
+        ve, vf, vv = dcm.eval_modify(coord, box, atype)
+
+        hh = global_default_fv_hh
+        hh=1e-4
+        places = global_default_places
+        places=1
+        nframes = coord.shape[0]
+        ndof = coord.shape[1]
+        natoms = ndof // 3
+        for ii in range(ndof):
+            coordp = np.copy(coord)
+            coordm = np.copy(coord)
+            coordp[:,ii] += hh
+            coordm[:,ii] -= hh
+            ep, _, __ = dcm.eval_modify(coordp, box, atype, eval_fv = False)
+            em, _, __ = dcm.eval_modify(coordm, box, atype, eval_fv = False)
+            num_f = -(ep - em) / (2.*hh)
+            for ff in range(nframes):
+                self.assertAlmostEqual(vf[ff,ii], num_f[ff], 
+                                       places = places,
+                                       msg = 'frame %d dof %d does not match' % (ff, ii))
+
+        box3 = np.reshape(box, [nframes, 3,3])
+        rbox3 = np.linalg.inv(box3)
+        coord3 = np.reshape(coord, [nframes, natoms, 3])
+        rcoord3 = np.matmul(coord3, rbox3)
+        num_deriv = np.zeros([nframes,3,3])
+        for ii in range(3):
+            for jj in range(3):
+                box3p = np.copy(box3)
+                box3m = np.copy(box3)
+                box3p[:,ii,jj] = box3[:,ii,jj] + hh
+                box3m[:,ii,jj] = box3[:,ii,jj] - hh
+                boxp = np.reshape(box3p, [-1,9])
+                boxm = np.reshape(box3m, [-1,9])
+                coord3p = np.matmul(rcoord3, box3p)
+                coord3m = np.matmul(rcoord3, box3m)
+                coordp = np.reshape(coord3p, [nframes,-1])
+                coordm = np.reshape(coord3m, [nframes,-1])
+                ep, _, __ = dcm.eval_modify(coordp, boxp, atype, eval_fv = False)
+                em, _, __ = dcm.eval_modify(coordm, boxm, atype, eval_fv = False)
+                num_deriv[:,ii,jj] = -(ep - em) / (2.*hh)
+        # box3t = np.transpose(box3, [0,2,1])
+        # t_esti = np.matmul(num_deriv, box3t)
+        num_deriv = np.transpose(num_deriv, [0,2,1])
+        t_esti = np.matmul(num_deriv, box3)
+
+        print(t_esti, '\n', vv.reshape([-1, 3, 3]))
+        for ff in range(nframes):
+            for ii in range(3):
+                for jj in range(3):                
+                    self.assertAlmostEqual(t_esti[ff][ii][jj], vv[ff,ii*3+jj], 
+                                           places = places,
+                                           msg = "frame %d virial component [%d,%d] failed" % (ff, ii, jj))
+            
diff --git a/source/tests/test_sel_idx.py b/source/tests/test_sel_idx.py
new file mode 100644
index 0000000000..47ef9c8496
--- /dev/null
+++ b/source/tests/test_sel_idx.py
@@ -0,0 +1,20 @@
+import os,sys
+import numpy as np
+import unittest
+
+from deepmd.common import select_idx_map
+
+def test():
+    raise RuntimeError
+
+class TestSelIdx (unittest.TestCase) :
+    def test_add (self) :
+        atom_type = np.array([0,1,2,2,1,0], dtype = int)
+        type_sel = np.array([1,0], dtype = int)
+        idx_map = select_idx_map(atom_type, type_sel)
+        new_atom_type = atom_type[idx_map]
+        self.assertEqual(list(idx_map), [0, 5, 1, 4])
+        self.assertEqual(list(new_atom_type), [0, 0, 1, 1])
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/source/train/CMakeLists.txt b/source/train/CMakeLists.txt
index 7b990d3df9..6f507d0607 100644
--- a/source/train/CMakeLists.txt
+++ b/source/train/CMakeLists.txt
@@ -2,7 +2,7 @@
 
 configure_file("RunOptions.py.in" "${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py" @ONLY)
 
-file(GLOB LIB_PY main.py common.py env.py compat.py Network.py Deep*.py Data.py DataSystem.py Model*.py Descrpt*.py Fitting.py Loss.py LearningRate.py Trainer.py TabInter.py EwaldRecp.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py)
+file(GLOB LIB_PY main.py common.py env.py compat.py Network.py Deep*.py Data.py DataSystem.py Model*.py Descrpt*.py Fitting.py Loss.py LearningRate.py Trainer.py TabInter.py EwaldRecp.py DataModifier.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py)
 
 file(GLOB CLS_PY  Local.py Slurm.py)
 
diff --git a/source/train/Data.py b/source/train/Data.py
index 399a13712b..7445f5ec1c 100644
--- a/source/train/Data.py
+++ b/source/train/Data.py
@@ -14,7 +14,8 @@ def __init__ (self,
                   sys_path, 
                   set_prefix = 'set',
                   shuffle_test = True, 
-                  type_map = None) :
+                  type_map = None, 
+                  modifier = None) :
         self.dirs = glob.glob (os.path.join(sys_path, set_prefix + ".*"))
         self.dirs.sort()
         # load atom type
@@ -46,6 +47,8 @@ def __init__ (self,
         self.set_count = 0
         self.iterator = 0
         self.shuffle_test = shuffle_test
+        # set modifier
+        self.modifier = modifier
 
 
     def add(self, 
@@ -139,7 +142,7 @@ def get_numb_set (self) :
         return len (self.train_dirs)
 
     def get_numb_batch (self, batch_size, set_idx) :
-        data = self._load_set(self.train_dirs[set_idx])
+        data = self._load_set(self.train_dirs[set_idx], modify = False)
         return data["coord"].shape[0] // batch_size
 
     def get_sys_numb_batch (self, batch_size) :
@@ -237,7 +240,7 @@ def _shuffle_data (self,
                 ret[kk] = data[kk]
         return ret, idx
 
-    def _load_set(self, set_name) :
+    def _load_set(self, set_name, modify = True) :
         ret = {}
         # get nframes
         path = os.path.join(set_name, "coord.npy")
@@ -269,7 +272,10 @@ def _load_set(self, set_name) :
                 data['find_'+kk] = data['find_'+k_in]
                 tmp_in = data[k_in].astype(global_ener_float_precision)
                 data[kk] = np.sum(np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis = 1)
-                
+
+        if modify and self.modifier is not None:
+            self.modifier.modify(data)
+
         return data
 
 
diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
new file mode 100644
index 0000000000..165a1abc71
--- /dev/null
+++ b/source/train/DataModifier.py
@@ -0,0 +1,293 @@
+import os,platform
+import numpy as np
+from deepmd import DeepDipole
+from deepmd.env import tf
+from deepmd.common import select_idx_map
+from deepmd.EwaldRecp import EwaldRecp
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+if platform.system() == "Windows":
+    ext = "dll"
+elif platform.system() == "Darwin":
+    ext = "dylib"
+else:
+    ext = "so"
+
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+assert (os.path.isfile (module_path  + "libop_abi.{}".format(ext) )), "op module does not exist"
+op_module = tf.load_op_library(module_path + "libop_abi.{}".format(ext))
+
+
+class DipoleChargeModifier(DeepDipole):
+    def __init__(self, 
+                 model_name, 
+                 model_charge_map,
+                 sys_charge_map, 
+                 ewald_h = 1, 
+                 ewald_beta = 1):
+        DeepDipole.__init__(self, model_name)
+        self.er = EwaldRecp(ewald_h, ewald_beta)
+        self.model_charge_map = model_charge_map
+        self.sys_charge_map = sys_charge_map
+        self.sel_type = list(self.get_sel_type())
+        # dimension of dipole
+        self.ext_dim = 3
+        self.t_ndesc  = self.graph.get_tensor_by_name ('load/descrpt_attr/ndescrpt:0')
+        self.t_sela  = self.graph.get_tensor_by_name ('load/descrpt_attr/sel:0')
+        [self.ndescrpt, self.sel_a] = self.sess.run([self.t_ndesc, self.t_sela])
+        self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
+        self.nnei_a = np.cumsum(self.sel_a)[-1]
+        self.nnei_r = np.cumsum(self.sel_r)[-1]
+        self.nnei = self.nnei_a + self.nnei_r
+        self.ndescrpt_a = self.nnei_a * 4
+        self.ndescrpt_r = self.nnei_r * 1
+        assert(self.ndescrpt == self.ndescrpt_a + self.ndescrpt_r)
+        self.force = None
+        self.ntypes = len(self.sel_a)
+
+    def build_fv_graph(self):
+        with self.graph.as_default():
+            return self._build_fv_graph_inner()        
+
+    def _build_fv_graph_inner(self):
+        self.t_ef = tf.placeholder(global_tf_float_precision, [None], name = 't_ef')
+        nf = 10
+        nfxnas = 64*nf
+        nfxna = 192*nf
+        nf = -1
+        nfxnas = -1
+        nfxna = -1
+        self.t_box_reshape = tf.reshape(self.t_box, [-1, 9])
+        t_nframes = tf.shape(self.t_box_reshape)[0]
+        # (nframes x natoms_sel) x 1 x 3
+        self.t_ef_reshape = tf.reshape(self.t_ef, [nfxnas, 1, 3])
+        # (nframes x natoms) x ndescrpt
+        self.descrpt = self.graph.get_tensor_by_name('load/o_rmat:0')
+        self.descrpt_deriv = self.graph.get_tensor_by_name('load/o_rmat_deriv:0')
+        self.nlist = self.graph.get_tensor_by_name('load/o_nlist:0')
+        self.rij = self.graph.get_tensor_by_name('load/o_rij:0')
+        # self.descrpt_reshape = tf.reshape(self.descrpt, [nf, 192 * self.ndescrpt])
+        # self.descrpt_deriv = tf.reshape(self.descrpt_deriv, [nf, 192 * self.ndescrpt * 3])
+
+        # nframes x (natoms_sel x 3)
+        self.t_tensor_reshpe = tf.reshape(self.t_tensor, [t_nframes, -1])
+        # nframes x (natoms x 3)
+        self.t_tensor_reshpe = self._enrich(self.t_tensor_reshpe, dof = 3)
+        # (nframes x natoms) x 3
+        self.t_tensor_reshpe = tf.reshape(self.t_tensor_reshpe, [nfxna, 3])
+        # (nframes x natoms) x 1
+        self.t_dipole_x = tf.slice(self.t_tensor_reshpe, [0, 0], [nfxna, 1])
+        self.t_dipole_y = tf.slice(self.t_tensor_reshpe, [0, 1], [nfxna, 1])
+        self.t_dipole_z = tf.slice(self.t_tensor_reshpe, [0, 2], [nfxna, 1])
+        self.t_dipole_z = tf.reshape(self.t_dipole_z, [nfxna, 1])
+        # (nframes x natoms) x ndescrpt
+        [self.t_dipole_x_d] = tf.gradients(self.t_dipole_x, self.descrpt)
+        [self.t_dipole_y_d] = tf.gradients(self.t_dipole_y, self.descrpt)
+        [self.t_dipole_z_d] = tf.gradients(self.t_dipole_z, self.descrpt)
+        # nframes x (natoms x ndescrpt)
+        self.t_dipole_x_d = tf.reshape(self.t_dipole_x_d, [-1, self.t_natoms[0] * self.ndescrpt])
+        self.t_dipole_y_d = tf.reshape(self.t_dipole_y_d, [-1, self.t_natoms[0] * self.ndescrpt])
+        self.t_dipole_z_d = tf.reshape(self.t_dipole_z_d, [-1, self.t_natoms[0] * self.ndescrpt])
+        # nframes x (natoms_sel x ndescrpt)
+        self.t_dipole_x_d = self._slice_descrpt_deriv(self.t_dipole_x_d)
+        self.t_dipole_y_d = self._slice_descrpt_deriv(self.t_dipole_y_d)
+        self.t_dipole_z_d = self._slice_descrpt_deriv(self.t_dipole_z_d)
+        # (nframes x natoms_sel) x ndescrpt
+        self.t_dipole_x_d = tf.reshape(self.t_dipole_x_d, [nfxnas, self.ndescrpt])
+        self.t_dipole_y_d = tf.reshape(self.t_dipole_y_d, [nfxnas, self.ndescrpt])
+        self.t_dipole_z_d = tf.reshape(self.t_dipole_z_d, [nfxnas, self.ndescrpt])
+        # (nframes x natoms_sel) x 3 x ndescrpt
+        self.t_dipole_d = tf.concat([self.t_dipole_x_d, self.t_dipole_y_d, self.t_dipole_z_d], axis = 1)
+        self.t_dipole_d = tf.reshape(self.t_dipole_d, [nfxnas, 3*self.ndescrpt])
+        # (nframes x natoms_sel) x 3 x ndescrpt
+        self.t_dipole_d = tf.reshape(self.t_dipole_d, [-1, 3, self.ndescrpt])
+        # (nframes x natoms_sel) x 1 x ndescrpt
+        self.t_ef_d = tf.matmul(self.t_ef_reshape, self.t_dipole_d)
+        # nframes x (natoms_sel x ndescrpt)
+        self.t_ef_d = tf.reshape(self.t_ef_d, [t_nframes, -1])
+        # nframes x (natoms x ndescrpt)
+        self.t_ef_d = self._enrich(self.t_ef_d, dof = self.ndescrpt)
+        self.t_ef_d = tf.reshape(self.t_ef_d, [nf, self.t_natoms[0] * self.ndescrpt])
+        # t_ef_d is force (with -1), prod_forc takes deriv, so we need the opposite
+        self.t_ef_d_oppo = -self.t_ef_d
+        
+        force = op_module.prod_force_se_a(self.t_ef_d_oppo,
+                                          self.descrpt_deriv, 
+                                          self.nlist, 
+                                          self.t_natoms,
+                                          n_a_sel = self.nnei_a,
+                                          n_r_sel = self.nnei_r)
+        virial, atom_virial \
+            = op_module.prod_virial_se_a (self.t_ef_d_oppo,
+                                          self.descrpt_deriv,
+                                          self.rij,
+                                          self.nlist,
+                                          self.t_natoms,
+                                          n_a_sel = self.nnei_a,
+                                          n_r_sel = self.nnei_r)
+        return force, virial, atom_virial
+
+    def _enrich(self, dipole, dof = 3):
+        coll = []                
+        sel_start_idx = 0
+        for type_i in range(self.ntypes):
+            if type_i in self.sel_type:
+                di = tf.slice(dipole, 
+                              [ 0, sel_start_idx           * dof],
+                              [-1, self.t_natoms[2+type_i] * dof])
+                sel_start_idx += self.t_natoms[2+type_i]
+            else:
+                di = tf.zeros([tf.shape(dipole)[0], self.t_natoms[2+type_i] * dof],
+                              dtype = global_tf_float_precision)
+            coll.append(di)
+        return tf.concat(coll, axis = 1)
+
+    def _slice_descrpt_deriv(self, deriv):
+        coll = []
+        start_idx = 0        
+        for type_i in range(self.ntypes):
+            if type_i in self.sel_type:
+                di = tf.slice(deriv, 
+                              [ 0, start_idx               * self.ndescrpt],
+                              [-1, self.t_natoms[2+type_i] * self.ndescrpt])
+                coll.append(di)
+                start_idx += self.t_natoms[2+type_i]
+        return tf.concat(coll, axis = 1)        
+
+
+    def eval_modify(self, coord, box, atype, eval_fv = True):
+        natoms = coord.shape[1] // 3
+        nframes = coord.shape[0]
+        box = np.reshape(box, [nframes, 9])
+        atype = np.reshape(atype, [nframes, natoms])
+        sel_idx_map = select_idx_map(atype[0], self.sel_type)
+        nsel = len(sel_idx_map)
+        # setup charge
+        charge = np.zeros([natoms])
+        for ii in range(natoms):
+            charge[ii] = self.sys_charge_map[atype[0][ii]]
+        charge = np.tile(charge, [nframes, 1])
+
+        # add wfcc
+        all_coord, all_charge, dipole = self._extend_system(coord, box, atype, charge)
+        
+        # print('compute er')
+        tot_e, all_f, all_v = self.er.eval(all_coord, all_charge, box)
+        # print('finish  er')
+
+        tot_f = None
+        tot_v = None
+        if self.force is None:
+            self.force, self.virial, self.av = self.build_fv_graph()
+        if eval_fv:
+            # compute f
+            ext_f = all_f[:,natoms*3:]
+            corr_f, corr_v, corr_av = self.eval_fv(coord, box, atype[0], ext_f)
+            tot_f = all_f[:,:natoms*3] + corr_f
+            for ii in range(nsel):            
+                orig_idx = sel_idx_map[ii]            
+                tot_f[:,orig_idx*3:orig_idx*3+3] += ext_f[:,ii*3:ii*3+3]                
+            # compute v
+            dipole3 = np.reshape(dipole, [nframes, nsel, 3])
+            ext_f3 = np.reshape(ext_f, [nframes, nsel, 3])
+            ext_f3 = np.transpose(ext_f3, [0, 2, 1])
+            # fd_corr_v = -np.matmul(ext_f3, dipole3).T.reshape([nframes, 9])
+            # fd_corr_v = -np.matmul(ext_f3, dipole3)
+            # fd_corr_v = np.transpose(fd_corr_v, [0, 2, 1]).reshape([nframes, 9])
+            fd_corr_v = -np.matmul(ext_f3, dipole3).reshape([nframes, 9])
+            # print(all_v, '\n', corr_v, '\n', fd_corr_v)
+            tot_v = all_v + corr_v + fd_corr_v
+
+        return tot_e, tot_f, tot_v
+
+
+    def eval_fv(self, coords, cells, atom_types, ext_f) :
+        # reshape the inputs 
+        cells = np.reshape(cells, [-1, 9])
+        nframes = cells.shape[0]
+        coords = np.reshape(coords, [nframes, -1])
+        natoms = coords.shape[1] // 3
+
+        # sort inputs
+        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(coords, atom_types, sel_atoms = self.get_sel_type())
+
+        # make natoms_vec and default_mesh
+        natoms_vec = self.make_natoms_vec(atom_types)
+        assert(natoms_vec[0] == natoms)
+        default_mesh = self.make_default_mesh(cells)
+
+        # evaluate
+        tensor = []
+        feed_dict_test = {}
+        feed_dict_test[self.t_natoms] = natoms_vec
+        feed_dict_test[self.t_type  ] = np.tile(atom_types, [nframes, 1]).reshape([-1])
+        feed_dict_test[self.t_coord ] = coords.reshape([-1])
+        feed_dict_test[self.t_box   ] = cells.reshape([-1])
+        feed_dict_test[self.t_mesh  ] = default_mesh.reshape([-1])
+        feed_dict_test[self.t_ef    ] = ext_f.reshape([-1])
+        # print(self.sess.run(tf.shape(self.t_tensor), feed_dict = feed_dict_test))
+        fout, vout, avout \
+            = self.sess.run([self.force, self.virial, self.av],
+                            feed_dict = feed_dict_test)
+        # print('fout: ', fout.shape, fout)
+        return fout, vout, avout
+
+
+    def _extend_system(self, coord, box, atype, charge):
+        natoms = coord.shape[1] // 3
+        nframes = coord.shape[0]
+        # sel atoms and setup ref coord
+        sel_idx_map = select_idx_map(atype[0], self.sel_type)
+        nsel = len(sel_idx_map)
+        coord3 = coord.reshape([nframes, natoms, 3])
+        ref_coord = coord3[:,sel_idx_map,:]
+        ref_coord = np.reshape(ref_coord, [nframes, nsel * 3])
+        
+        dipole = self.eval(coord, box, atype[0])
+        dipole = np.reshape(dipole, [nframes, nsel * 3])
+        
+        wfcc_coord = ref_coord + dipole
+        # wfcc_coord = dipole
+        wfcc_charge = np.zeros([nsel])
+        for ii in range(nsel):
+            orig_idx = self.sel_type.index(atype[0][sel_idx_map[ii]])
+            wfcc_charge[ii] = self.model_charge_map[orig_idx]
+        wfcc_charge = np.tile(wfcc_charge, [nframes, 1])
+
+        wfcc_coord = np.reshape(wfcc_coord, [nframes, nsel * 3])
+        wfcc_charge = np.reshape(wfcc_charge, [nframes, nsel])
+
+        all_coord = np.concatenate((coord, wfcc_coord), axis = 1)
+        all_charge = np.concatenate((charge, wfcc_charge), axis = 1)
+
+        return all_coord, all_charge, dipole
+
+
+    def modify(self, 
+               data):
+        if 'find_energy' not in data and 'find_force' not in data and 'find_virial' not in data:
+            return
+
+        get_nframes=None
+        coord = data['coord'][:get_nframes,:]
+        box = data['box'][:get_nframes,:]
+        atype = data['type'][:get_nframes,:]
+        nframes = coord.shape[0]
+
+        tot_e, tot_f, tot_v = self.eval_modify(coord, box, atype)
+
+        # print(tot_f[:,0])
+        
+        if 'find_energy' in data and data['find_energy'] == 1.0 :
+            data['energy'] -= tot_e.reshape([nframes, 1])
+        if 'find_force' in data and data['find_force'] == 1.0 :
+            data['force'] -= tot_f
+        if 'find_virial' in data and data['find_virial'] == 1.0 :
+            data['virial'] -= tot_v.reshape([nframes, 9])
+
+
+                           
diff --git a/source/train/DataSystem.py b/source/train/DataSystem.py
index 7bb61b477c..b967074a64 100644
--- a/source/train/DataSystem.py
+++ b/source/train/DataSystem.py
@@ -19,7 +19,8 @@ def __init__ (self,
                   set_prefix = 'set',
                   shuffle_test = True,
                   run_opt = None, 
-                  type_map = None) :
+                  type_map = None, 
+                  modifier = None) :
         # init data
         self.rcut = rcut
         self.system_dirs = systems
@@ -29,8 +30,8 @@ def __init__ (self,
             self.data_systems.append(DeepmdData(ii, 
                                                 set_prefix=set_prefix, 
                                                 shuffle_test=shuffle_test, 
-                                                type_map = type_map))
-
+                                                type_map = type_map, 
+                                                modifier = modifier))
         # batch size
         self.batch_size = batch_size
         if isinstance(self.batch_size, int) :
diff --git a/source/train/DeepEval.py b/source/train/DeepEval.py
index f78699b6f8..8e9e1d7113 100644
--- a/source/train/DeepEval.py
+++ b/source/train/DeepEval.py
@@ -52,17 +52,17 @@ def load_graph(self,
         return graph
 
     def make_default_mesh(self, test_box) :
-        ncell = np.ones (3, dtype=np.int32)
-        avg_box = np.average (test_box, axis = 0)
         cell_size = 3
-        avg_box = np.reshape (avg_box, [3,3])
-        for ii in range (3) :
-            ncell[ii] = int ( np.linalg.norm(avg_box[ii]) / cell_size )
-            if (ncell[ii] < 2) : ncell[ii] = 2
-        default_mesh = np.zeros (6, dtype = np.int32)
-        default_mesh[3] = ncell[0]
-        default_mesh[4] = ncell[1]
-        default_mesh[5] = ncell[2]
+        nframes = test_box.shape[0]
+        default_mesh = np.zeros([nframes, 6], dtype = np.int32)
+        for ff in range(nframes):
+            ncell = np.ones (3, dtype=np.int32)
+            for ii in range(3) :
+                ncell[ii] = int ( np.linalg.norm(test_box[ff][ii]) / cell_size )
+                if (ncell[ii] < 2) : ncell[ii] = 2
+            default_mesh[ff][3] = ncell[0]
+            default_mesh[ff][4] = ncell[1]
+            default_mesh[ff][5] = ncell[2]
         return default_mesh
 
     def sort_input(self, coord, atom_type, sel_atoms = None) :
@@ -173,12 +173,12 @@ def eval(self,
         tensor = []
         feed_dict_test = {}
         feed_dict_test[self.t_natoms] = natoms_vec
-        feed_dict_test[self.t_mesh  ] = default_mesh
         feed_dict_test[self.t_type  ] = atom_types
         t_out = [self.t_tensor]
         for ii in range(nframes) :
             feed_dict_test[self.t_coord] = np.reshape(coords[ii:ii+1, :], [-1])
             feed_dict_test[self.t_box  ] = np.reshape(cells [ii:ii+1, :], [-1])
+            feed_dict_test[self.t_mesh ] = default_mesh[ii]
             v_out = self.sess.run (t_out, feed_dict = feed_dict_test)
             tensor.append(v_out[0])
 
diff --git a/source/train/DescrptSeA.py b/source/train/DescrptSeA.py
index aa5221d7c7..5df382f683 100644
--- a/source/train/DescrptSeA.py
+++ b/source/train/DescrptSeA.py
@@ -136,6 +136,12 @@ def build (self,
             t_ntypes = tf.constant(self.ntypes, 
                                    name = 'ntypes', 
                                    dtype = tf.int32)
+            t_ndescrpt = tf.constant(self.ndescrpt, 
+                                     name = 'ndescrpt', 
+                                     dtype = tf.int32)            
+            t_sel = tf.constant(self.sel_a, 
+                                name = 'sel', 
+                                dtype = tf.int32)            
             self.t_avg = tf.get_variable('t_avg', 
                                          davg.shape, 
                                          dtype = global_tf_float_precision,
@@ -166,6 +172,10 @@ def build (self,
                                        sel_r = self.sel_r)
 
         self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
+        self.descrpt_reshape = tf.identity(self.descrpt_reshape, name = 'o_rmat')
+        self.descrpt_deriv = tf.identity(self.descrpt_deriv, name = 'o_rmat_deriv')
+        self.rij = tf.identity(self.rij, name = 'o_rij')
+        self.nlist = tf.identity(self.nlist, name = 'o_nlist')
 
         self.dout, self.qmat = self._pass_filter(self.descrpt_reshape, natoms, suffix = suffix, reuse = reuse)
 
diff --git a/source/train/common.py b/source/train/common.py
index f725ae4ec6..7d8e530460 100644
--- a/source/train/common.py
+++ b/source/train/common.py
@@ -1,3 +1,5 @@
+import numpy as np
+
 data_requirement = {}
 
 def add_data_requirement(key, 
@@ -16,6 +18,15 @@ def add_data_requirement(key,
     }
     
 
+def select_idx_map(atom_type, 
+                   type_sel):
+    sort_type_sel = np.sort(type_sel)
+    idx_map = np.array([], dtype = int)
+    for ii in sort_type_sel:
+        idx_map = np.append(idx_map, np.where(atom_type == ii))
+    return idx_map
+
+
 class ClassArg () : 
     def __init__ (self) :
         self.arg_dict = {}
diff --git a/source/train/train.py b/source/train/train.py
index a57cb735f1..b41c471a3f 100755
--- a/source/train/train.py
+++ b/source/train/train.py
@@ -16,6 +16,7 @@
 from deepmd.DataSystem import DataSystem, DeepmdDataSystem
 from deepmd.Trainer import NNPTrainer
 from deepmd.common import data_requirement
+from deepmd.DataModifier import DipoleChargeModifier
 
 def create_done_queue(cluster_spec, task_index):
    with tf.device("/job:ps/task:%d" % (task_index)):
@@ -53,8 +54,8 @@ def j_must_have (jdata, key) :
 
 def train (args) :
     # load json database
-    fp = open (args.INPUT, 'r')
-    jdata = json.load (fp)
+    with open (args.INPUT, 'r') as fp:
+       jdata = json.load (fp)
     if not 'model' in jdata.keys():
        jdata = convert_input_v0_v1(jdata, 
                                    warning = True, 
@@ -107,7 +108,27 @@ def _do_work(jdata, run_opt):
        ipt_type_map = None
     else:
        ipt_type_map = type_map
-    data = DeepmdDataSystem(systems, batch_size, test_size, rcut, set_prefix=set_pfx, run_opt=run_opt, type_map = ipt_type_map)
+    # data modifier
+    modifier = None
+    modi_data = jdata['training'].get("data_modifier", None)
+    if modi_data is not None:
+       if modi_data['type'] == 'dipole_charge':
+          modifier = DipoleChargeModifier(modi_data['model_name'],
+                                          modi_data['model_charge_map'],
+                                          modi_data['sys_charge_map'],
+                                          modi_data['ewald_h'],
+                                          modi_data['ewald_beta'])
+       else:
+          raise RuntimeError('unknown modifier type ' + str(modi_data['type']))
+    # init data
+    data = DeepmdDataSystem(systems, 
+                            batch_size, 
+                            test_size, 
+                            rcut, 
+                            set_prefix=set_pfx, 
+                            run_opt=run_opt, 
+                            type_map = ipt_type_map, 
+                            modifier = modifier)
     data.add_dict(data_requirement)
     # build the model with stats from the first system
     model.build (data)

From 8d5d3e8ce503f0a232f3c9369edb2fcd64b76663 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Tue, 5 Nov 2019 18:34:45 +0800
Subject: [PATCH 08/38] batch evaluate ewald and correction

---
 source/train/DataModifier.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
index 165a1abc71..044b1d8182 100644
--- a/source/train/DataModifier.py
+++ b/source/train/DataModifier.py
@@ -176,7 +176,18 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
         all_coord, all_charge, dipole = self._extend_system(coord, box, atype, charge)
         
         # print('compute er')
-        tot_e, all_f, all_v = self.er.eval(all_coord, all_charge, box)
+        batch_size = 5
+        tot_e = []
+        all_f = []
+        all_v = []
+        for ii in range(0,nframes,batch_size):
+            e,f,v = self.er.eval(all_coord[ii:ii+batch_size], all_charge[ii:ii+batch_size], box[ii:ii+batch_size])
+            tot_e.append(e)
+            all_f.append(f)
+            all_v.append(v)
+        tot_e = np.concatenate(tot_e, axis = 0)
+        all_f = np.concatenate(all_f, axis = 0)
+        all_v = np.concatenate(all_v, axis = 0)
         # print('finish  er')
 
         tot_f = None
@@ -186,7 +197,18 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
         if eval_fv:
             # compute f
             ext_f = all_f[:,natoms*3:]
-            corr_f, corr_v, corr_av = self.eval_fv(coord, box, atype[0], ext_f)
+            corr_f = []
+            corr_v = []
+            corr_av = []
+            for ii in range(0,nframes,batch_size):
+                print(ii, nframes)
+                f, v, av = self.eval_fv(coord[ii:ii+batch_size], box[ii:ii+batch_size], atype[0], ext_f[ii:ii+batch_size])
+                corr_f.append(f)
+                corr_v.append(v)
+                corr_av.append(av)
+            corr_f = np.concatenate(corr_f, axis = 0)
+            corr_v = np.concatenate(corr_v, axis = 0)
+            corr_av = np.concatenate(corr_av, axis = 0)
             tot_f = all_f[:,:natoms*3] + corr_f
             for ii in range(nsel):            
                 orig_idx = sel_idx_map[ii]            

From 06394584c8cf611876ab8ef2f8c46e36b6e433be Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 6 Nov 2019 11:08:49 +0800
Subject: [PATCH 09/38] result of data statisitic (davg, dstd) are private
 members of descriptor classes

---
 source/tests/test_descrpt_se_ar.py |  2 +-
 source/train/DescrptLocFrame.py    | 14 +++++++-------
 source/train/DescrptSeA.py         | 14 ++++++--------
 source/train/DescrptSeAR.py        | 19 ++++++++++++++-----
 source/train/DescrptSeR.py         | 12 ++++++------
 source/train/Loss.py               |  2 +-
 source/train/Model.py              | 27 +++++++++++----------------
 7 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/source/tests/test_descrpt_se_ar.py b/source/tests/test_descrpt_se_ar.py
index 651724ae7a..01c9e99496 100644
--- a/source/tests/test_descrpt_se_ar.py
+++ b/source/tests/test_descrpt_se_ar.py
@@ -89,7 +89,7 @@ def comp_ef (self,
                  tnatoms,
                  name,
                  reuse = None) :
-        dout = self.descrpt.build(dcoord, dtype, tnatoms, dbox, self.default_mesh, self.avg, self.std, suffix=name, reuse=reuse)
+        dout = self.descrpt.build(dcoord, dtype, tnatoms, dbox, self.default_mesh, suffix=name, reuse=reuse)
         inputs_reshape = tf.reshape (dout, [-1, self.descrpt.get_dim_out()])
         atom_ener = self._net (inputs_reshape, name, reuse = reuse)
         atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])       
diff --git a/source/train/DescrptLocFrame.py b/source/train/DescrptLocFrame.py
index ab00ac31e2..302bdf7496 100644
--- a/source/train/DescrptLocFrame.py
+++ b/source/train/DescrptLocFrame.py
@@ -42,7 +42,8 @@ def __init__(self, jdata):
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-
+        self.davg = None
+        self.dstd = None
 
     def get_rcut (self) :
         return self.rcut_r
@@ -85,10 +86,9 @@ def compute_dstats (self,
                         dstd[ii] = 1e-2            
                 all_davg.append(davg)
                 all_dstd.append(dstd)
-        davg = np.array(all_davg)
-        dstd = np.array(all_dstd)
-        return davg, dstd
-        
+        self.davg = np.array(all_davg)
+        self.dstd = np.array(all_dstd)        
+
         
     def build (self, 
                coord_, 
@@ -96,10 +96,10 @@ def build (self,
                natoms,
                box_, 
                mesh,
-               davg = None, 
-               dstd = None,
                suffix = '', 
                reuse = None):
+        davg = self.davg
+        dstd = self.dstd
         with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
             if davg is None:
                 davg = np.zeros([self.ntypes, self.ndescrpt]) 
diff --git a/source/train/DescrptSeA.py b/source/train/DescrptSeA.py
index 5df382f683..c13e553aa1 100644
--- a/source/train/DescrptSeA.py
+++ b/source/train/DescrptSeA.py
@@ -50,8 +50,9 @@ def __init__ (self, jdata):
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-
         self.useBN = False
+        self.dstd = None
+        self.davg = None
 
 
     def get_rcut (self) :
@@ -108,10 +109,8 @@ def compute_dstats (self,
                 all_davg.append(davg)
                 all_dstd.append(dstd)
 
-        davg = np.array(all_davg)
-        dstd = np.array(all_dstd)
-
-        return davg, dstd
+        self.davg = np.array(all_davg)
+        self.dstd = np.array(all_dstd)
 
 
     def build (self, 
@@ -120,11 +119,10 @@ def build (self,
                natoms,
                box_, 
                mesh,
-               davg = None, 
-               dstd = None,
                suffix = '', 
                reuse = None):
-
+        davg = self.davg
+        dstd = self.dstd
         with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
             if davg is None:
                 davg = np.zeros([self.ntypes, self.ndescrpt]) 
diff --git a/source/train/DescrptSeAR.py b/source/train/DescrptSeAR.py
index 929b660248..97a254df9a 100644
--- a/source/train/DescrptSeAR.py
+++ b/source/train/DescrptSeAR.py
@@ -33,6 +33,8 @@ def __init__ (self, jdata):
         self.descrpt_a = DescrptSeA(self.param_a)
         self.descrpt_r = DescrptSeR(self.param_r)        
         assert(self.descrpt_a.get_ntypes() == self.descrpt_r.get_ntypes())
+        self.davg = None
+        self.dstd = None
 
     def get_rcut (self) :
         return np.max([self.descrpt_a.get_rcut(), self.descrpt_r.get_rcut()])
@@ -57,7 +59,8 @@ def compute_dstats (self,
                         mesh) :    
         davg_a, dstd_a = self.descrpt_a.compute_dstats(data_coord, data_box, data_atype, natoms_vec, mesh)
         davg_r, dstd_r = self.descrpt_r.compute_dstats(data_coord, data_box, data_atype, natoms_vec, mesh)
-        return [davg_a, davg_r], [dstd_a, dstd_r]
+        self.davg = [davg_a, davg_r]
+        self.dstd = [dstd_a, dstd_r]
 
 
     def build (self, 
@@ -66,13 +69,19 @@ def build (self,
                natoms,
                box, 
                mesh,
-               davg,
-               dstd,
                suffix = '', 
                reuse = None):
+        davg = self.davg
+        dstd = self.dstd
+        if davg is None:
+            davg = [np.zeros([self.descrpt_a.ntypes, self.descrpt_a.ndescrpt]), 
+                    np.zeros([self.descrpt_r.ntypes, self.descrpt_r.ndescrpt])]
+        if dstd is None:
+            dstd = [np.ones ([self.descrpt_a.ntypes, self.descrpt_a.ndescrpt]), 
+                    np.ones ([self.descrpt_r.ntypes, self.descrpt_r.ndescrpt])]
         # dout
-        self.dout_a = self.descrpt_a.build(coord_, atype_, natoms, box, mesh, davg[0], dstd[0], suffix=suffix+'_a', reuse=reuse)
-        self.dout_r = self.descrpt_r.build(coord_, atype_, natoms, box, mesh, davg[1], dstd[1], suffix=suffix+'_r', reuse=reuse)
+        self.dout_a = self.descrpt_a.build(coord_, atype_, natoms, box, mesh, suffix=suffix+'_a', reuse=reuse)
+        self.dout_r = self.descrpt_r.build(coord_, atype_, natoms, box, mesh, suffix=suffix+'_r', reuse=reuse)
         self.dout_a = tf.reshape(self.dout_a, [-1, self.descrpt_a.get_dim_out()])
         self.dout_r = tf.reshape(self.dout_r, [-1, self.descrpt_r.get_dim_out()])
         self.dout = tf.concat([self.dout_a, self.dout_r], axis = 1)
diff --git a/source/train/DescrptSeR.py b/source/train/DescrptSeR.py
index b057a0afd3..1f0bd614f6 100644
--- a/source/train/DescrptSeR.py
+++ b/source/train/DescrptSeR.py
@@ -46,8 +46,9 @@ def __init__ (self, jdata):
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.nnei_r
-
         self.useBN = False
+        self.davg = None
+        self.dstd = None
 
 
     def get_rcut (self) :
@@ -90,10 +91,9 @@ def compute_dstats (self,
             all_davg.append(davg)
             all_dstd.append(dstd)
 
-        davg = np.array(all_davg)
-        dstd = np.array(all_dstd)
+        self.davg = np.array(all_davg)
+        self.dstd = np.array(all_dstd)
 
-        return davg, dstd
 
     def build (self, 
                coord_, 
@@ -101,10 +101,10 @@ def build (self,
                natoms,
                box_, 
                mesh,
-               davg = None, 
-               dstd = None,
                suffix = '', 
                reuse = None):
+        davg = self.davg
+        dstd = self.dstd
         with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
             if davg is None:
                 davg = np.zeros([self.ntypes, self.ndescrpt]) 
diff --git a/source/train/Loss.py b/source/train/Loss.py
index 6d9abf7026..ad1d2211c7 100644
--- a/source/train/Loss.py
+++ b/source/train/Loss.py
@@ -75,7 +75,7 @@ def build (self,
         force_hat_reshape = tf.reshape (force_hat, [-1])
         atom_pref_reshape = tf.reshape (atom_pref, [-1])
         diff_f = force_hat_reshape - force_reshape
-        if self.relative_f is not None:
+        if self.relative_f is not None:            
             force_hat_3 = tf.reshape(force_hat, [-1, 3])
             norm_f = tf.reshape(tf.norm(force_hat_3, axis = 1), [-1, 1]) + self.relative_f
             diff_f_3 = tf.reshape(diff_f, [-1, 3])
diff --git a/source/train/Model.py b/source/train/Model.py
index 9cbb8e1123..8f54d80e59 100644
--- a/source/train/Model.py
+++ b/source/train/Model.py
@@ -81,13 +81,13 @@ def data_stat(self, data):
 
 
     def _compute_dstats (self, all_stat, protection = 1e-2) :
-        self.davg, self.dstd \
-            = self.descrpt.compute_dstats(all_stat['coord'],
-                                          all_stat['box'],
-                                          all_stat['type'],
-                                          all_stat['natoms_vec'],
-                                          all_stat['default_mesh'])        
+        self.descrpt.compute_dstats(all_stat['coord'],
+                                    all_stat['box'],
+                                    all_stat['type'],
+                                    all_stat['natoms_vec'],
+                                    all_stat['default_mesh'])
         self.fitting.compute_dstats(all_stat, protection = protection)
+
     
     def build (self, 
                coord_, 
@@ -129,8 +129,6 @@ def build (self,
                                  natoms,
                                  box,
                                  mesh,
-                                 davg = self.davg,
-                                 dstd = self.dstd,
                                  suffix = suffix,
                                  reuse = reuse)
         dout = tf.identity(dout, name='o_descriptor')
@@ -266,12 +264,11 @@ def data_stat(self, data):
         self._compute_dstats (all_stat)
 
     def _compute_dstats (self, all_stat) :        
-        self.davg, self.dstd \
-            = self.descrpt.compute_dstats(all_stat['coord'],
-                                          all_stat['box'],
-                                          all_stat['type'],
-                                          all_stat['natoms_vec'],
-                                          all_stat['default_mesh'])
+        self.descrpt.compute_dstats(all_stat['coord'],
+                                    all_stat['box'],
+                                    all_stat['type'],
+                                    all_stat['natoms_vec'],
+                                    all_stat['default_mesh'])
 
     def build (self, 
                coord_, 
@@ -302,8 +299,6 @@ def build (self,
                                  natoms,
                                  box,
                                  mesh,
-                                 davg = self.davg,
-                                 dstd = self.dstd,
                                  suffix = suffix,
                                  reuse = reuse)
         dout = tf.identity(dout, name='o_descriptor')

From a23ec38cb363c3c0ce1a4374670307e9f8de548b Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 6 Nov 2019 15:30:58 +0800
Subject: [PATCH 10/38] rename dstat to input_stat. Refactorize the stat data
 generation

---
 source/tests/test_fitting_stat.py  |  2 +-
 source/tests/test_gen_stat_data.py | 92 +++++++++++++++++++++++++++++
 source/train/DescrptLocFrame.py    |  2 +-
 source/train/DescrptSeA.py         |  2 +-
 source/train/DescrptSeAR.py        |  6 +-
 source/train/DescrptSeR.py         |  2 +-
 source/train/Fitting.py            |  2 +-
 source/train/Loss.py               | 10 ++--
 source/train/Model.py              | 95 +++++++++++++++++++++++-------
 9 files changed, 179 insertions(+), 34 deletions(-)
 create mode 100644 source/tests/test_gen_stat_data.py

diff --git a/source/tests/test_fitting_stat.py b/source/tests/test_fitting_stat.py
index aed3a6992a..0cbd693ae1 100644
--- a/source/tests/test_fitting_stat.py
+++ b/source/tests/test_fitting_stat.py
@@ -69,7 +69,7 @@ def test (self) :
         all_data = _make_fake_data(sys_natoms, sys_nframes, avgs, stds)
         frefa, frefs = _brute_fparam(all_data, len(avgs))
         arefa, arefs = _brute_aparam(all_data, len(avgs))
-        fitting.compute_dstats(all_data, protection = 1e-2)
+        fitting.compute_input_stats(all_data, protection = 1e-2)
         # print(frefa, frefs)
         for ii in range(len(avgs)):
             self.assertAlmostEqual(frefa[ii], fitting.fparam_avg[ii])
diff --git a/source/tests/test_gen_stat_data.py b/source/tests/test_gen_stat_data.py
new file mode 100644
index 0000000000..d9f99b5818
--- /dev/null
+++ b/source/tests/test_gen_stat_data.py
@@ -0,0 +1,92 @@
+import os,sys,platform,json,shutil
+import numpy as np
+import unittest
+import dpdata
+
+from deepmd.DataSystem import DeepmdDataSystem
+from deepmd.Model import make_all_stat, merge_sys_stat, _make_all_stat_ref
+
+def gen_sys(nframes, atom_types):
+    natoms = len(atom_types)
+    data = {}
+    data['coords'] = np.random.random([nframes, natoms, 3])
+    data['forces'] = np.random.random([nframes, natoms, 3])
+    data['cells'] = np.random.random([nframes, 9])
+    data['energies'] = np.random.random([nframes, 1])
+    types = list(set(list(atom_types)))
+    types.sort()
+    data['atom_names'] = []
+    data['atom_numbs'] = []
+    for ii in range(len(types)):
+        data['atom_names'] .append( 'TYPE_%d' % ii )
+        data['atom_numbs'] .append(np.sum(atom_types == ii))
+    data['atom_types'] = np.array(atom_types, dtype = int)
+    return data
+
+class TestGenStatData(unittest.TestCase) :
+    def setUp(self):
+        data0 = gen_sys(20, [0, 1, 0, 2, 1])
+        data1 = gen_sys(30, [0, 1, 0, 0])
+        sys0 = dpdata.LabeledSystem()
+        sys1 = dpdata.LabeledSystem()
+        sys0.data = data0
+        sys1.data = data1
+        sys0.to_deepmd_npy('system_0', set_size = 10)
+        sys0.to_deepmd_npy('system_1', set_size = 10)
+        
+    def tearDown(self):
+        shutil.rmtree('system_0')
+        shutil.rmtree('system_1')
+
+    def _comp_data(self, d0, d1) :
+        for ii in range(d0.shape[0]):
+            for jj in range(d0.shape[1]):
+                for kk in range(d0.shape[2]):
+                    self.assertAlmostEqual(d0[ii][jj][kk], d1[ii][jj][kk])
+
+    def test_merge_all_stat(self):
+        np.random.seed(0)
+        data0 = DeepmdDataSystem(['system_0', 'system_1'], 
+                                5, 
+                                10, 
+                                1.0)
+        data0.add('energy', 1, must = True)
+        np.random.seed(0)
+        data1 = DeepmdDataSystem(['system_0', 'system_1'], 
+                                5, 
+                                10, 
+                                1.0)
+        data1.add('force', 3, atomic = True, must = True)
+        np.random.seed(0)
+        data2 = DeepmdDataSystem(['system_0', 'system_1'], 
+                                5, 
+                                10, 
+                                1.0)
+        data2.add('force', 3, atomic = True, must = True)
+        
+        np.random.seed(0)
+        all_stat_0 = make_all_stat(data0, 10, merge_sys = False)
+        np.random.seed(0)
+        all_stat_1 = make_all_stat(data1, 10, merge_sys = True)
+        all_stat_2 = merge_sys_stat(all_stat_0)
+        np.random.seed(0)
+        all_stat_3 = _make_all_stat_ref(data2, 10)
+        
+        # print(all_stat_1)
+        # print(all_stat_2)
+        for dd in all_stat_0 :
+            if 'find_' in dd: continue
+            if 'natoms_vec' in dd: continue
+            if 'default_mesh' in dd: continue
+            # print(all_stat_2[dd])
+            # print(all_stat_1[dd])
+            d1 = np.array(all_stat_1[dd])
+            d2 = np.array(all_stat_2[dd])
+            d3 = np.array(all_stat_3[dd])
+            # print(dd)
+            # print(d1.shape)
+            # print(d2.shape)            
+            # self.assertEqual(all_stat_2[dd], all_stat_1[dd])
+            self._comp_data(d1, d2)
+            self._comp_data(d1, d3)
+
diff --git a/source/train/DescrptLocFrame.py b/source/train/DescrptLocFrame.py
index 302bdf7496..25748e2770 100644
--- a/source/train/DescrptLocFrame.py
+++ b/source/train/DescrptLocFrame.py
@@ -57,7 +57,7 @@ def get_dim_out (self) :
     def get_nlist (self) :
         return self.nlist, self.rij, self.sel_a, self.sel_r
 
-    def compute_dstats (self,
+    def compute_input_stats (self,
                         data_coord, 
                         data_box, 
                         data_atype, 
diff --git a/source/train/DescrptSeA.py b/source/train/DescrptSeA.py
index c13e553aa1..9b599a0eff 100644
--- a/source/train/DescrptSeA.py
+++ b/source/train/DescrptSeA.py
@@ -70,7 +70,7 @@ def get_dim_rot_mat_1 (self) :
     def get_nlist (self) :
         return self.nlist, self.rij, self.sel_a, self.sel_r
 
-    def compute_dstats (self,
+    def compute_input_stats (self,
                         data_coord, 
                         data_box, 
                         data_atype, 
diff --git a/source/train/DescrptSeAR.py b/source/train/DescrptSeAR.py
index 97a254df9a..699f34ecbe 100644
--- a/source/train/DescrptSeAR.py
+++ b/source/train/DescrptSeAR.py
@@ -51,14 +51,14 @@ def get_nlist_a (self) :
     def get_nlist_r (self) :
         return self.descrpt_r.nlist, self.descrpt_r.rij, self.descrpt_r.sel_a, self.descrpt_r.sel_r
 
-    def compute_dstats (self,
+    def compute_input_stats (self,
                         data_coord, 
                         data_box, 
                         data_atype, 
                         natoms_vec,
                         mesh) :    
-        davg_a, dstd_a = self.descrpt_a.compute_dstats(data_coord, data_box, data_atype, natoms_vec, mesh)
-        davg_r, dstd_r = self.descrpt_r.compute_dstats(data_coord, data_box, data_atype, natoms_vec, mesh)
+        davg_a, dstd_a = self.descrpt_a.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh)
+        davg_r, dstd_r = self.descrpt_r.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh)
         self.davg = [davg_a, davg_r]
         self.dstd = [dstd_a, dstd_r]
 
diff --git a/source/train/DescrptSeR.py b/source/train/DescrptSeR.py
index 1f0bd614f6..2399eff54f 100644
--- a/source/train/DescrptSeR.py
+++ b/source/train/DescrptSeR.py
@@ -63,7 +63,7 @@ def get_dim_out (self) :
     def get_nlist (self) :
         return self.nlist, self.rij, self.sel_a, self.sel_r
 
-    def compute_dstats (self,
+    def compute_input_stats (self,
                         data_coord, 
                         data_box, 
                         data_atype, 
diff --git a/source/train/Fitting.py b/source/train/Fitting.py
index 59ea7a5ed8..7039afb5a4 100644
--- a/source/train/Fitting.py
+++ b/source/train/Fitting.py
@@ -49,7 +49,7 @@ def get_numb_fparam(self) :
     def get_numb_aparam(self) :
         return self.numb_fparam
 
-    def compute_dstats(self, all_stat, protection):
+    def compute_input_stats(self, all_stat, protection):
         # stat fparam
         if self.numb_fparam > 0:
             cat_data = np.concatenate(all_stat['fparam'], axis = 0)
diff --git a/source/train/Loss.py b/source/train/Loss.py
index ad1d2211c7..264e8b7b9c 100644
--- a/source/train/Loss.py
+++ b/source/train/Loss.py
@@ -42,11 +42,11 @@ def __init__ (self, jdata, **kwarg) :
         self.has_ae = (self.start_pref_ae != 0 or self.limit_pref_ae != 0)
         self.has_pf = (self.start_pref_pf != 0 or self.limit_pref_pf != 0)
         # data required
-        add_data_requirement('energy', 1, atomic=False, must=False, high_prec=True)
-        add_data_requirement('force',  3, atomic=True,  must=False, high_prec=False)
-        add_data_requirement('virial', 9, atomic=False, must=False, high_prec=False)
-        add_data_requirement('atom_ener', 1, atomic=True, must=False, high_prec=False)
-        add_data_requirement('atom_pref', 1, atomic=True, must=False, high_prec=False, repeat=3)
+        add_data_requirement('energy', 1, atomic=False, must=self.has_e, high_prec=True)
+        add_data_requirement('force',  3, atomic=True,  must=self.has_f, high_prec=False)
+        add_data_requirement('virial', 9, atomic=False, must=self.has_v, high_prec=False)
+        add_data_requirement('atom_ener', 1, atomic=True, must=self.has_ae, high_prec=False)
+        add_data_requirement('atom_pref', 1, atomic=True, must=self.has_pf, high_prec=False, repeat=3)
 
     def build (self, 
                learning_rate,
diff --git a/source/train/Model.py b/source/train/Model.py
index 8f54d80e59..3b69bf590d 100644
--- a/source/train/Model.py
+++ b/source/train/Model.py
@@ -22,6 +22,66 @@
 assert (os.path.isfile (module_path  + "libop_abi.{}".format(ext) )), "op module does not exist"
 op_module = tf.load_op_library(module_path + "libop_abi.{}".format(ext))
 
+
+def _make_all_stat_ref(data, nbatches):
+    all_stat = defaultdict(list)
+    for ii in range(data.get_nsystems()) :
+        for jj in range(nbatches) :
+            stat_data = data.get_batch (sys_idx = ii)
+            for dd in stat_data:
+                if dd == "natoms_vec":
+                    stat_data[dd] = stat_data[dd].astype(np.int32) 
+                all_stat[dd].append(stat_data[dd])        
+    return all_stat
+
+
+def make_all_stat(data, nbatches, merge_sys = True):
+    """
+    pack data for statistics
+    Parameters
+    ----------
+    data:
+        The data
+    merge_sys: bool (True)
+        Merge system data
+    Returns
+    -------
+    all_stat:
+        A dictionary of list of list storing data for stat. 
+        if merge_sys == False data can be accessed by 
+            all_stat[key][sys_idx][batch_idx][frame_idx]
+        else merge_sys == True can be accessed by 
+            all_stat[key][batch_idx][frame_idx]
+    """
+    all_stat = defaultdict(list)
+    for ii in range(data.get_nsystems()) :
+        sys_stat =  defaultdict(list)
+        for jj in range(nbatches) :
+            stat_data = data.get_batch (sys_idx = ii)
+            for dd in stat_data:
+                if dd == "natoms_vec":
+                    stat_data[dd] = stat_data[dd].astype(np.int32) 
+                sys_stat[dd].append(stat_data[dd])
+        for dd in sys_stat:
+            if merge_sys:
+                for bb in sys_stat[dd]:
+                    all_stat[dd].append(bb)
+            else:                    
+                all_stat[dd].append(sys_stat[dd])
+    return all_stat
+
+def merge_sys_stat(all_stat):
+    first_key = list(all_stat.keys())[0]
+    nsys = len(all_stat[first_key])
+    # print('nsys is ---------------', nsys)
+    ret = defaultdict(list)
+    for ii in range(nsys):
+        for dd in all_stat:
+            for bb in all_stat[dd][ii]:
+                ret[dd].append(bb)
+    return ret
+
+
 class Model() :
     model_type = 'ener'
 
@@ -68,25 +128,18 @@ def get_type_map (self) :
         return self.type_map
 
     def data_stat(self, data):
-        all_stat = defaultdict(list)
-        for ii in range(data.get_nsystems()) :
-            for jj in range(self.data_stat_nbatch) :
-                stat_data = data.get_batch (sys_idx = ii)
-                for dd in stat_data:
-                    if dd == "natoms_vec":
-                        stat_data[dd] = stat_data[dd].astype(np.int32) 
-                    all_stat[dd].append(stat_data[dd])        
-        self._compute_dstats (all_stat, protection = self.data_stat_protect)
+        all_stat = make_all_stat(data, self.data_stat_nbatch, merge_sys = False)
+        m_all_stat = merge_sys_stat(all_stat)
+        self._compute_dstats (m_all_stat, protection = self.data_stat_protect)
         self.bias_atom_e = data.compute_energy_shift(self.rcond)
 
-
     def _compute_dstats (self, all_stat, protection = 1e-2) :
-        self.descrpt.compute_dstats(all_stat['coord'],
-                                    all_stat['box'],
-                                    all_stat['type'],
-                                    all_stat['natoms_vec'],
-                                    all_stat['default_mesh'])
-        self.fitting.compute_dstats(all_stat, protection = protection)
+        self.descrpt.compute_input_stats(all_stat['coord'],
+                                         all_stat['box'],
+                                         all_stat['type'],
+                                         all_stat['natoms_vec'],
+                                         all_stat['default_mesh'])
+        self.fitting.compute_input_stats(all_stat, protection = protection)
 
     
     def build (self, 
@@ -264,11 +317,11 @@ def data_stat(self, data):
         self._compute_dstats (all_stat)
 
     def _compute_dstats (self, all_stat) :        
-        self.descrpt.compute_dstats(all_stat['coord'],
-                                    all_stat['box'],
-                                    all_stat['type'],
-                                    all_stat['natoms_vec'],
-                                    all_stat['default_mesh'])
+        self.descrpt.compute_input_stats(all_stat['coord'],
+                                         all_stat['box'],
+                                         all_stat['type'],
+                                         all_stat['natoms_vec'],
+                                         all_stat['default_mesh'])
 
     def build (self, 
                coord_, 

From afd400f526cc44f57f48479029038f56e179179c Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 6 Nov 2019 17:08:31 +0800
Subject: [PATCH 11/38] mv energy shift computation to output data stat of
 fitting classes

---
 source/tests/test_gen_stat_data.py     | 64 +++++++++++++++++++-------
 source/tests/test_model_loc_frame.py   |  4 +-
 source/tests/test_model_se_a.py        |  4 +-
 source/tests/test_model_se_a_aparam.py |  4 +-
 source/tests/test_model_se_a_fparam.py |  4 +-
 source/tests/test_model_se_a_srtab.py  |  4 +-
 source/tests/test_model_se_r.py        |  4 +-
 source/train/Fitting.py                | 34 +++++++++++++-
 source/train/Model.py                  | 14 +++---
 9 files changed, 98 insertions(+), 38 deletions(-)

diff --git a/source/tests/test_gen_stat_data.py b/source/tests/test_gen_stat_data.py
index d9f99b5818..852915bd12 100644
--- a/source/tests/test_gen_stat_data.py
+++ b/source/tests/test_gen_stat_data.py
@@ -4,6 +4,7 @@
 import dpdata
 
 from deepmd.DataSystem import DeepmdDataSystem
+from deepmd.Fitting import EnerFitting
 from deepmd.Model import make_all_stat, merge_sys_stat, _make_all_stat_ref
 
 def gen_sys(nframes, atom_types):
@@ -32,7 +33,7 @@ def setUp(self):
         sys0.data = data0
         sys1.data = data1
         sys0.to_deepmd_npy('system_0', set_size = 10)
-        sys0.to_deepmd_npy('system_1', set_size = 10)
+        sys1.to_deepmd_npy('system_1', set_size = 10)
         
     def tearDown(self):
         shutil.rmtree('system_0')
@@ -72,21 +73,50 @@ def test_merge_all_stat(self):
         np.random.seed(0)
         all_stat_3 = _make_all_stat_ref(data2, 10)
         
-        # print(all_stat_1)
-        # print(all_stat_2)
-        for dd in all_stat_0 :
-            if 'find_' in dd: continue
-            if 'natoms_vec' in dd: continue
-            if 'default_mesh' in dd: continue
+        ####################################
+        # only check if the energy is concatenated correctly
+        ####################################
+        dd = 'energy'
+            # if 'find_' in dd: continue
+            # if 'natoms_vec' in dd: continue
+            # if 'default_mesh' in dd: continue
             # print(all_stat_2[dd])
-            # print(all_stat_1[dd])
-            d1 = np.array(all_stat_1[dd])
-            d2 = np.array(all_stat_2[dd])
-            d3 = np.array(all_stat_3[dd])
-            # print(dd)
-            # print(d1.shape)
-            # print(d2.shape)            
-            # self.assertEqual(all_stat_2[dd], all_stat_1[dd])
-            self._comp_data(d1, d2)
-            self._comp_data(d1, d3)
+            # print(dd, all_stat_1[dd])
+        d1 = np.array(all_stat_1[dd])
+        d2 = np.array(all_stat_2[dd])
+        d3 = np.array(all_stat_3[dd])
+        # print(dd)
+        # print(d1.shape)
+        # print(d2.shape)            
+        # self.assertEqual(all_stat_2[dd], all_stat_1[dd])
+        self._comp_data(d1, d2)
+        self._comp_data(d1, d3)
 
+
+class TestEnerShift(unittest.TestCase):
+    def setUp(self):
+        data0 = gen_sys(30, [0, 1, 0, 2, 1])
+        data1 = gen_sys(30, [0, 1, 0, 0])    
+        sys0 = dpdata.LabeledSystem()
+        sys1 = dpdata.LabeledSystem()
+        sys0.data = data0
+        sys1.data = data1
+        sys0.to_deepmd_npy('system_0', set_size = 10)
+        sys1.to_deepmd_npy('system_1', set_size = 10)
+        
+    def tearDown(self):
+        shutil.rmtree('system_0')
+        shutil.rmtree('system_1')
+
+    def test_ener_shift(self):
+        np.random.seed(0)
+        data = DeepmdDataSystem(['system_0', 'system_1'], 
+                                5, 
+                                10, 
+                                1.0)
+        data.add('energy', 1, must = True)
+        ener_shift0 = data.compute_energy_shift(rcond = 1)
+        all_stat = make_all_stat(data, 4, merge_sys = False)
+        ener_shift1 = EnerFitting._compute_output_stats(all_stat, rcond = 1)        
+        for ii in range(len(ener_shift0)):
+            self.assertAlmostEqual(ener_shift0[ii], ener_shift1[ii])
diff --git a/source/tests/test_model_loc_frame.py b/source/tests/test_model_loc_frame.py
index 5981ee6497..b651862885 100644
--- a/source/tests/test_model_loc_frame.py
+++ b/source/tests/test_model_loc_frame.py
@@ -49,8 +49,8 @@ def test_model(self):
                       'natoms_vec' : [test_data['natoms_vec']],
                       'default_mesh' : [test_data['default_mesh']]
         }
-        model._compute_dstats(input_data)
-        model.bias_atom_e = data.compute_energy_shift()
+        model._compute_input_stat(input_data)
+        model.fitting.bias_atom_e = data.compute_energy_shift()
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/tests/test_model_se_a.py b/source/tests/test_model_se_a.py
index d5be148358..0d54f14c5f 100644
--- a/source/tests/test_model_se_a.py
+++ b/source/tests/test_model_se_a.py
@@ -49,8 +49,8 @@ def test_model(self):
                       'natoms_vec' : [test_data['natoms_vec']],
                       'default_mesh' : [test_data['default_mesh']]
         }
-        model._compute_dstats(input_data)
-        model.bias_atom_e = data.compute_energy_shift()
+        model._compute_input_stat(input_data)
+        model.descrpt.bias_atom_e = data.compute_energy_shift()
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/tests/test_model_se_a_aparam.py b/source/tests/test_model_se_a_aparam.py
index 293145e377..58b060225c 100644
--- a/source/tests/test_model_se_a_aparam.py
+++ b/source/tests/test_model_se_a_aparam.py
@@ -51,8 +51,8 @@ def test_model(self):
                       'default_mesh' : [test_data['default_mesh']],
                       'aparam': [test_data['aparam']],
         }
-        model._compute_dstats(input_data)
-        model.bias_atom_e = data.compute_energy_shift()
+        model._compute_input_stat(input_data)
+        model.descrpt.bias_atom_e = data.compute_energy_shift()
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/tests/test_model_se_a_fparam.py b/source/tests/test_model_se_a_fparam.py
index 4f85af0579..ec4a46c7d4 100644
--- a/source/tests/test_model_se_a_fparam.py
+++ b/source/tests/test_model_se_a_fparam.py
@@ -49,8 +49,8 @@ def test_model(self):
                       'default_mesh' : [test_data['default_mesh']],
                       'fparam': [test_data['fparam']],
         }
-        model._compute_dstats(input_data)
-        model.bias_atom_e = data.compute_energy_shift()
+        model._compute_input_stat(input_data)
+        model.descrpt.bias_atom_e = data.compute_energy_shift()
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/tests/test_model_se_a_srtab.py b/source/tests/test_model_se_a_srtab.py
index 82d1492067..c2950fe788 100644
--- a/source/tests/test_model_se_a_srtab.py
+++ b/source/tests/test_model_se_a_srtab.py
@@ -58,8 +58,8 @@ def test_model(self):
                       'natoms_vec' : [test_data['natoms_vec']],
                       'default_mesh' : [test_data['default_mesh']]
         }
-        model._compute_dstats(input_data)
-        model.bias_atom_e = data.compute_energy_shift()
+        model._compute_input_stat(input_data)
+        model.descrpt.bias_atom_e = data.compute_energy_shift()
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/tests/test_model_se_r.py b/source/tests/test_model_se_r.py
index 965e89a1bd..d3607a9164 100644
--- a/source/tests/test_model_se_r.py
+++ b/source/tests/test_model_se_r.py
@@ -48,8 +48,8 @@ def test_model(self):
                       'natoms_vec' : [test_data['natoms_vec']],
                       'default_mesh' : [test_data['default_mesh']]
         }
-        model._compute_dstats(input_data)
-        model.bias_atom_e = data.compute_energy_shift()
+        model._compute_input_stat(input_data)
+        model.descrpt.bias_atom_e = data.compute_energy_shift()
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/train/Fitting.py b/source/train/Fitting.py
index 7039afb5a4..62fcf2a7dc 100644
--- a/source/train/Fitting.py
+++ b/source/train/Fitting.py
@@ -23,14 +23,17 @@ def __init__ (self, jdata, descrpt):
                .add('numb_aparam',      int,    default = 0)\
                .add('neuron',           list,   default = [120,120,120], alias = 'n_neuron')\
                .add('resnet_dt',        bool,   default = True)\
+               .add('rcond',            float,  default = 1e-3) \
                .add('seed',             int)               
         class_data = args.parse(jdata)
         self.numb_fparam = class_data['numb_fparam']
         self.numb_aparam = class_data['numb_aparam']
         self.n_neuron = class_data['neuron']
         self.resnet_dt = class_data['resnet_dt']
+        self.rcond = class_data['rcond']
         self.seed = class_data['seed']
         self.useBN = False
+        self.bias_atom_e = None
         # data requirement
         if self.numb_fparam > 0 :
             add_data_requirement('fparam', self.numb_fparam, atomic=False, must=True, high_prec=False)
@@ -49,6 +52,32 @@ def get_numb_fparam(self) :
     def get_numb_aparam(self) :
         return self.numb_fparam
 
+    def compute_output_stats(self, all_stat):
+        self.bias_atom_e = self._compute_output_stats(all_stat, rcond = self.rcond)
+
+    @classmethod
+    def _compute_output_stats(self, all_stat, rcond = 1e-3):
+        data = all_stat['energy']
+        # data[sys_idx][batch_idx][frame_idx]
+        sys_ener = np.array([])
+        for ss in range(len(data)):
+            sys_data = []
+            for ii in range(len(data[ss])):
+                for jj in range(len(data[ss][ii])):
+                    sys_data.append(data[ss][ii][jj])
+            sys_data = np.concatenate(sys_data)
+            sys_ener = np.append(sys_ener, np.average(sys_data))
+        data = all_stat['natoms_vec']
+        sys_tynatom = np.array([])
+        nsys = len(data)
+        for ss in range(len(data)):
+            sys_tynatom = np.append(sys_tynatom, data[ss][0].astype(np.float64))
+        sys_tynatom = np.reshape(sys_tynatom, [nsys,-1])
+        sys_tynatom = sys_tynatom[:,2:]
+        energy_shift,resd,rank,s_value \
+            = np.linalg.lstsq(sys_tynatom, sys_ener, rcond = rcond)
+        return energy_shift    
+
     def compute_input_stats(self, all_stat, protection):
         # stat fparam
         if self.numb_fparam > 0:
@@ -78,7 +107,8 @@ def compute_input_stats(self, all_stat, protection):
             for ii in range(self.aparam_std.size):
                 if self.aparam_std[ii] < protection:
                     self.aparam_std[ii] = protection
-            self.aparam_inv_std = 1./self.aparam_std                
+            self.aparam_inv_std = 1./self.aparam_std
+
 
     def _compute_std (self, sumv2, sumv, sumn) :
         return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
@@ -88,9 +118,9 @@ def build (self,
                inputs,
                input_dict,
                natoms,
-               bias_atom_e = None,
                reuse = None,
                suffix = '') :
+        bias_atom_e = self.bias_atom_e
         if self.numb_fparam > 0 and ( self.fparam_avg is None or self.fparam_inv_std is None ):
             raise RuntimeError('No data stat result. one should do data statisitic, before build')
         if self.numb_aparam > 0 and ( self.aparam_avg is None or self.aparam_inv_std is None ):
diff --git a/source/train/Model.py b/source/train/Model.py
index 3b69bf590d..d0ef0bf2fc 100644
--- a/source/train/Model.py
+++ b/source/train/Model.py
@@ -73,7 +73,6 @@ def make_all_stat(data, nbatches, merge_sys = True):
 def merge_sys_stat(all_stat):
     first_key = list(all_stat.keys())[0]
     nsys = len(all_stat[first_key])
-    # print('nsys is ---------------', nsys)
     ret = defaultdict(list)
     for ii in range(nsys):
         for dd in all_stat:
@@ -95,14 +94,12 @@ def __init__ (self, jdata, descrpt, fitting):
 
         args = ClassArg()\
                .add('type_map',         list,   default = []) \
-               .add('rcond',            float,  default = 1e-3) \
                .add('data_stat_nbatch', int,    default = 10) \
                .add('data_stat_protect',float,  default = 1e-2) \
                .add('use_srtab',        str)
         class_data = args.parse(jdata)
         self.type_map = class_data['type_map']
         self.srtab_name = class_data['use_srtab']
-        self.rcond = class_data['rcond']
         self.data_stat_nbatch = class_data['data_stat_nbatch']
         self.data_stat_protect = class_data['data_stat_protect']
         if self.srtab_name is not None :
@@ -130,10 +127,11 @@ def get_type_map (self) :
     def data_stat(self, data):
         all_stat = make_all_stat(data, self.data_stat_nbatch, merge_sys = False)
         m_all_stat = merge_sys_stat(all_stat)
-        self._compute_dstats (m_all_stat, protection = self.data_stat_protect)
-        self.bias_atom_e = data.compute_energy_shift(self.rcond)
+        self._compute_input_stat(m_all_stat, protection = self.data_stat_protect)
+        self._compute_output_stat(all_stat)
+        # self.bias_atom_e = data.compute_energy_shift(self.rcond)
 
-    def _compute_dstats (self, all_stat, protection = 1e-2) :
+    def _compute_input_stat (self, all_stat, protection = 1e-2) :
         self.descrpt.compute_input_stats(all_stat['coord'],
                                          all_stat['box'],
                                          all_stat['type'],
@@ -141,6 +139,9 @@ def _compute_dstats (self, all_stat, protection = 1e-2) :
                                          all_stat['default_mesh'])
         self.fitting.compute_input_stats(all_stat, protection = protection)
 
+    def _compute_output_stat (self, all_stat) :
+        self.fitting.compute_output_stats(all_stat)
+
     
     def build (self, 
                coord_, 
@@ -194,7 +195,6 @@ def build (self,
         atom_ener = self.fitting.build (dout, 
                                         input_dict, 
                                         natoms, 
-                                        bias_atom_e = self.bias_atom_e, 
                                         reuse = reuse, 
                                         suffix = suffix)
 

From fd9cff20ec5228a03476ce272571b52eff764f35 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 7 Nov 2019 17:04:35 +0800
Subject: [PATCH 12/38] modify data only when the data is asked. save
 computational cost

---
 source/train/Data.py         | 28 ++++++++++++++++++++--------
 source/train/DataModifier.py |  5 ++---
 source/train/DataSystem.py   | 23 ++++++++++++++++-------
 source/train/DeepEval.py     | 16 ++--------------
 source/train/DescrptSeAR.py  |  8 ++++----
 source/train/Trainer.py      |  2 +-
 source/train/common.py       | 14 ++++++++++++++
 7 files changed, 59 insertions(+), 37 deletions(-)

diff --git a/source/train/Data.py b/source/train/Data.py
index 7445f5ec1c..07057d9fec 100644
--- a/source/train/Data.py
+++ b/source/train/Data.py
@@ -125,12 +125,24 @@ def get_batch(self, batch_size) :
             iterator_1 = set_size
         idx = np.arange (self.iterator, iterator_1)
         self.iterator += batch_size
-        return self._get_subdata(self.batch_set, idx)
+        ret = self._get_subdata(self.batch_set, idx)
+        if self.modifier is not None:
+            self.modifier.modify(ret)
+        return ret
 
-    def get_test (self) :
+    def get_test (self, ntests = -1) :
         if not hasattr(self, 'test_set') :            
             self._load_test_set(self.test_dir, self.shuffle_test)
-        return self._get_subdata(self.test_set)        
+        if ntests == -1:
+            idx = None
+        else :
+            ntests_ = ntests if ntests < self.test_set['type'].shape[0] else self.test_set['type'].shape[0]
+            # print('ntest', self.test_set['type'].shape[0], ntests, ntests_)
+            idx = np.arange(ntests_)
+        ret = self._get_subdata(self.test_set, idx = idx)
+        if self.modifier is not None:
+            self.modifier.modify(ret)
+        return ret
 
     def get_type_map(self) :
         return self.type_map
@@ -142,7 +154,7 @@ def get_numb_set (self) :
         return len (self.train_dirs)
 
     def get_numb_batch (self, batch_size, set_idx) :
-        data = self._load_set(self.train_dirs[set_idx], modify = False)
+        data = self._load_set(self.train_dirs[set_idx])
         return data["coord"].shape[0] // batch_size
 
     def get_sys_numb_batch (self, batch_size) :
@@ -214,6 +226,9 @@ def _load_batch_set (self,
                          set_name) :
         self.batch_set = self._load_set(set_name)
         self.batch_set, sf_idx = self._shuffle_data(self.batch_set)
+        self.reset_get_batch()
+
+    def reset_get_batch(self):
         self.iterator = 0
 
     def _load_test_set (self,
@@ -240,7 +255,7 @@ def _shuffle_data (self,
                 ret[kk] = data[kk]
         return ret, idx
 
-    def _load_set(self, set_name, modify = True) :
+    def _load_set(self, set_name) :
         ret = {}
         # get nframes
         path = os.path.join(set_name, "coord.npy")
@@ -273,9 +288,6 @@ def _load_set(self, set_name, modify = True) :
                 tmp_in = data[k_in].astype(global_ener_float_precision)
                 data[kk] = np.sum(np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis = 1)
 
-        if modify and self.modifier is not None:
-            self.modifier.modify(data)
-
         return data
 
 
diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
index 044b1d8182..029e64219f 100644
--- a/source/train/DataModifier.py
+++ b/source/train/DataModifier.py
@@ -2,7 +2,7 @@
 import numpy as np
 from deepmd import DeepDipole
 from deepmd.env import tf
-from deepmd.common import select_idx_map
+from deepmd.common import select_idx_map, make_default_mesh
 from deepmd.EwaldRecp import EwaldRecp
 from deepmd.RunOptions import global_tf_float_precision
 from deepmd.RunOptions import global_np_float_precision
@@ -201,7 +201,6 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
             corr_v = []
             corr_av = []
             for ii in range(0,nframes,batch_size):
-                print(ii, nframes)
                 f, v, av = self.eval_fv(coord[ii:ii+batch_size], box[ii:ii+batch_size], atype[0], ext_f[ii:ii+batch_size])
                 corr_f.append(f)
                 corr_v.append(v)
@@ -240,7 +239,7 @@ def eval_fv(self, coords, cells, atom_types, ext_f) :
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types)
         assert(natoms_vec[0] == natoms)
-        default_mesh = self.make_default_mesh(cells)
+        default_mesh = make_default_mesh(cells)
 
         # evaluate
         tensor = []
diff --git a/source/train/DataSystem.py b/source/train/DataSystem.py
index b967074a64..997baf878c 100644
--- a/source/train/DataSystem.py
+++ b/source/train/DataSystem.py
@@ -88,14 +88,20 @@ def __init__ (self,
             self.print_summary(run_opt)
 
 
-    def _load_test(self):
+    def _load_test(self, ntests = -1):
         self.test_data = collections.defaultdict(list)
-        self.default_mesh = []
         for ii in range(self.nsystems) :
-            test_system_data = self.data_systems[ii].get_test ()
+            test_system_data = self.data_systems[ii].get_test(ntests = ntests)
             for nn in test_system_data:
                 self.test_data[nn].append(test_system_data[nn])
-            cell_size = np.max (self.rcut)
+
+    def _make_default_mesh(self):
+        self.default_mesh = []
+        cell_size = np.max (self.rcut)
+        for ii in range(self.nsystems) :
+            test_system_data = self.data_systems[ii].get_batch(self.batch_size[ii])
+            self.data_systems[ii].reset_get_batch()
+            # test_system_data = self.data_systems[ii].get_test()
             avg_box = np.average (test_system_data["box"], axis = 0)
             avg_box = np.reshape (avg_box, [3,3])
             ncell = (np.linalg.norm(avg_box, axis=1)/ cell_size).astype(np.int32)
@@ -152,7 +158,7 @@ def get_batch (self,
                    sys_weights = None,
                    style = "prob_sys_size") :
         if not hasattr(self, 'default_mesh') :
-            self._load_test()
+            self._make_default_mesh()
         if sys_idx is not None :
             self.pick_idx = sys_idx
         else :
@@ -172,9 +178,12 @@ def get_batch (self,
         return b_data
 
     def get_test (self, 
-                  sys_idx = None) :
+                  sys_idx = None, 
+                  ntests = -1) :
         if not hasattr(self, 'default_mesh') :
-            self._load_test()
+            self._make_default_mesh()
+        if not hasattr(self, 'test_data') :
+            self._load_test(ntests = ntests)
         if sys_idx is not None :
             idx = sys_idx
         else :
diff --git a/source/train/DeepEval.py b/source/train/DeepEval.py
index 8e9e1d7113..e452dc860d 100644
--- a/source/train/DeepEval.py
+++ b/source/train/DeepEval.py
@@ -4,6 +4,7 @@
 import numpy as np
 
 from deepmd.env import tf
+from deepmd.common import make_default_mesh
 
 from tensorflow.python.framework import ops
 
@@ -51,19 +52,6 @@ def load_graph(self,
             )
         return graph
 
-    def make_default_mesh(self, test_box) :
-        cell_size = 3
-        nframes = test_box.shape[0]
-        default_mesh = np.zeros([nframes, 6], dtype = np.int32)
-        for ff in range(nframes):
-            ncell = np.ones (3, dtype=np.int32)
-            for ii in range(3) :
-                ncell[ii] = int ( np.linalg.norm(test_box[ff][ii]) / cell_size )
-                if (ncell[ii] < 2) : ncell[ii] = 2
-            default_mesh[ff][3] = ncell[0]
-            default_mesh[ff][4] = ncell[1]
-            default_mesh[ff][5] = ncell[2]
-        return default_mesh
 
     def sort_input(self, coord, atom_type, sel_atoms = None) :
         if sel_atoms is not None:
@@ -167,7 +155,7 @@ def eval(self,
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types)
         assert(natoms_vec[0] == natoms)
-        default_mesh = self.make_default_mesh(cells)
+        default_mesh = make_default_mesh(cells)
 
         # evaluate
         tensor = []
diff --git a/source/train/DescrptSeAR.py b/source/train/DescrptSeAR.py
index 699f34ecbe..b7005ff22c 100644
--- a/source/train/DescrptSeAR.py
+++ b/source/train/DescrptSeAR.py
@@ -57,10 +57,10 @@ def compute_input_stats (self,
                         data_atype, 
                         natoms_vec,
                         mesh) :    
-        davg_a, dstd_a = self.descrpt_a.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh)
-        davg_r, dstd_r = self.descrpt_r.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh)
-        self.davg = [davg_a, davg_r]
-        self.dstd = [dstd_a, dstd_r]
+        self.descrpt_a.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh)
+        self.descrpt_r.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh)
+        self.davg = [self.descrpt_a.davg, self.descrpt_r.davg]
+        self.dstd = [self.descrpt_a.dstd, self.descrpt_r.dstd]
 
 
     def build (self, 
diff --git a/source/train/Trainer.py b/source/train/Trainer.py
index 10472d828b..2cc73e673e 100644
--- a/source/train/Trainer.py
+++ b/source/train/Trainer.py
@@ -463,7 +463,7 @@ def test_on_the_fly (self,
                          fp,
                          data,
                          feed_dict_batch) :
-        test_data = data.get_test ()
+        test_data = data.get_test(ntests = self.numb_test)
         feed_dict_test = {}
         for kk in test_data.keys():
             if kk == 'find_type' or kk == 'type' :
diff --git a/source/train/common.py b/source/train/common.py
index 7d8e530460..c322f0e9c4 100644
--- a/source/train/common.py
+++ b/source/train/common.py
@@ -27,6 +27,20 @@ def select_idx_map(atom_type,
     return idx_map
 
 
+def make_default_mesh(test_box, cell_size = 3) :
+    nframes = test_box.shape[0]
+    default_mesh = np.zeros([nframes, 6], dtype = np.int32)
+    for ff in range(nframes):
+        ncell = np.ones (3, dtype=np.int32)
+        for ii in range(3) :
+            ncell[ii] = int ( np.linalg.norm(test_box[ff][ii]) / cell_size )
+            if (ncell[ii] < 2) : ncell[ii] = 2
+        default_mesh[ff][3] = ncell[0]
+        default_mesh[ff][4] = ncell[1]
+        default_mesh[ff][5] = ncell[2]
+    return default_mesh
+
+
 class ClassArg () : 
     def __init__ (self) :
         self.arg_dict = {}

From 4da6faee1ff14f9d905a07da657bf81e7624dd15 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 7 Nov 2019 20:34:49 +0800
Subject: [PATCH 13/38] modify batch data on load

---
 source/train/Data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/train/Data.py b/source/train/Data.py
index 07057d9fec..6c0672afc2 100644
--- a/source/train/Data.py
+++ b/source/train/Data.py
@@ -120,14 +120,14 @@ def get_batch(self, batch_size) :
             self._load_batch_set (self.train_dirs[self.set_count % self.get_numb_set()])
             self.set_count += 1
             set_size = self.batch_set["coord"].shape[0]
+            if self.modifier is not None:
+                self.modifier.modify(self.batch_set)
         iterator_1 = self.iterator + batch_size
         if iterator_1 >= set_size :
             iterator_1 = set_size
         idx = np.arange (self.iterator, iterator_1)
         self.iterator += batch_size
         ret = self._get_subdata(self.batch_set, idx)
-        if self.modifier is not None:
-            self.modifier.modify(ret)
         return ret
 
     def get_test (self, ntests = -1) :

From 835140f866df1696e94247d8e05ba979f17a03f3 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 8 Nov 2019 13:50:08 +0800
Subject: [PATCH 14/38] infer global polar

---
 deepmd/__init__.py        |  1 +
 source/scripts/freeze.py  |  2 ++
 source/train/DeepEval.py  | 14 +++++++++-----
 source/train/DeepPolar.py | 11 +++++++++++
 4 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/deepmd/__init__.py b/deepmd/__init__.py
index b2711088e8..231145b989 100644
--- a/deepmd/__init__.py
+++ b/deepmd/__init__.py
@@ -3,6 +3,7 @@
 from .DeepPot    import DeepPot
 from .DeepDipole import DeepDipole
 from .DeepPolar  import DeepPolar
+from .DeepPolar  import DeepGlobalPolar
 from .DeepWFC    import DeepWFC
 
 set_mkl()
diff --git a/source/scripts/freeze.py b/source/scripts/freeze.py
index 06f1df86c1..4dd51492a4 100755
--- a/source/scripts/freeze.py
+++ b/source/scripts/freeze.py
@@ -44,6 +44,8 @@ def _make_node_names(model_type = None) :
         nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     elif model_type == 'polar':
         nodes = "o_polar,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
+    elif model_type == 'global_polar':
+        nodes = "o_global_polar,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     else:
         raise RuntimeError('unknow model type ' + model_type)
     return nodes
diff --git a/source/train/DeepEval.py b/source/train/DeepEval.py
index e452dc860d..2c0f613c27 100644
--- a/source/train/DeepEval.py
+++ b/source/train/DeepEval.py
@@ -137,7 +137,8 @@ def get_sel_type(self):
     def eval(self,
              coords, 
              cells, 
-             atom_types) :
+             atom_types, 
+             atomic = True) :
         # standarize the shape of inputs
         coords = np.array(coords)
         cells = np.array(cells)
@@ -171,10 +172,13 @@ def eval(self,
             tensor.append(v_out[0])
 
         # reverse map of the outputs
-        tensor = np.array(tensor)
-        tensor = self.reverse_map(np.reshape(tensor, [nframes,-1,self.variable_dof]), sel_imap)
-
-        tensor = np.reshape(tensor, [nframes, len(sel_at), self.variable_dof])
+        if atomic:
+            tensor = np.array(tensor)
+            tensor = self.reverse_map(np.reshape(tensor, [nframes,-1,self.variable_dof]), sel_imap)
+            tensor = np.reshape(tensor, [nframes, len(sel_at), self.variable_dof])
+        else:
+            tensor = np.reshape(tensor, [nframes, self.variable_dof])
+        
         return tensor
 
     
diff --git a/source/train/DeepPolar.py b/source/train/DeepPolar.py
index 0ec37876ca..3af499dd07 100644
--- a/source/train/DeepPolar.py
+++ b/source/train/DeepPolar.py
@@ -9,3 +9,14 @@ def __init__(self,
                  model_file) :
         DeepTensor.__init__(self, model_file, 'polar', 9)
 
+    
+class DeepGlobalPolar (DeepTensor) :
+    def __init__(self, 
+                 model_file) :
+        DeepTensor.__init__(self, model_file, 'global_polar', 9)
+
+    def eval(self,
+             coords, 
+             cells, 
+             atom_types) :
+        return DeepTensor.eval(self, coords, cells, atom_types, atomic = False)

From 1c9c74d3818fad0a61ac25d702796fcb0baf5fe7 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 8 Nov 2019 16:05:08 +0800
Subject: [PATCH 15/38] use average eig value as bias of polar fitting

---
 source/train/Fitting.py | 29 +++++++++++++++++++---
 source/train/Model.py   | 53 ++++++++++++++++++++++-------------------
 2 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/source/train/Fitting.py b/source/train/Fitting.py
index 62fcf2a7dc..650aac8115 100644
--- a/source/train/Fitting.py
+++ b/source/train/Fitting.py
@@ -401,6 +401,21 @@ def get_sel_type(self):
     def get_out_size(self):
         return 9
 
+    def compute_input_stats(self, all_stat, protection = 1e-2):
+        assert('polarizability' in all_stat.keys())
+        data = all_stat['polarizability']
+        all_tmp = []
+        for ss in range(len(data)):
+            tmp = np.concatenate(data[ss], axis = 0)
+            print(tmp.shape)
+            tmp = np.reshape(tmp, [-1, 3, 3])
+            tmp,_ = np.linalg.eig(tmp)
+            tmp = np.absolute(tmp)
+            tmp = np.sort(tmp, axis = 1)
+            all_tmp.append(tmp)
+        all_tmp = np.concatenate(all_tmp, axis = 1)
+        self.avgeig = np.average(all_tmp, axis = 0)
+
     def build (self, 
                input_d,
                rot_mat,
@@ -433,15 +448,23 @@ def build (self,
                 else :
                     layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed)
             if self.fit_diag :
+                bavg = np.zeros(self.dim_rot_mat_1)
+                bavg[0] = self.avgeig[0]
+                bavg[1] = self.avgeig[1]
+                bavg[2] = self.avgeig[2]
                 # (nframes x natoms) x naxis
-                final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed)
+                final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg)
                 # (nframes x natoms) x naxis
                 final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], self.dim_rot_mat_1])
                 # (nframes x natoms) x naxis x naxis
                 final_layer = tf.matrix_diag(final_layer)                
-            else :                
+            else :
+                bavg = np.zeros(self.dim_rot_mat_1*self.dim_rot_mat_1)
+                bavg[0*self.dim_rot_mat_1+0] = self.avgeig[0]
+                bavg[1*self.dim_rot_mat_1+1] = self.avgeig[1]
+                bavg[2*self.dim_rot_mat_1+2] = self.avgeig[2]
                 # (nframes x natoms) x (naxis x naxis)
-                final_layer = one_layer(layer, self.dim_rot_mat_1*self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed)
+                final_layer = one_layer(layer, self.dim_rot_mat_1*self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg)
                 # (nframes x natoms) x naxis x naxis
                 final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms[2+type_i], self.dim_rot_mat_1, self.dim_rot_mat_1])
                 # (nframes x natoms) x naxis x naxis
diff --git a/source/train/Model.py b/source/train/Model.py
index d0ef0bf2fc..d00d1bfaa4 100644
--- a/source/train/Model.py
+++ b/source/train/Model.py
@@ -126,18 +126,18 @@ def get_type_map (self) :
 
     def data_stat(self, data):
         all_stat = make_all_stat(data, self.data_stat_nbatch, merge_sys = False)
-        m_all_stat = merge_sys_stat(all_stat)
-        self._compute_input_stat(m_all_stat, protection = self.data_stat_protect)
+        self._compute_input_stat(all_stat, protection = self.data_stat_protect)
         self._compute_output_stat(all_stat)
         # self.bias_atom_e = data.compute_energy_shift(self.rcond)
 
     def _compute_input_stat (self, all_stat, protection = 1e-2) :
-        self.descrpt.compute_input_stats(all_stat['coord'],
-                                         all_stat['box'],
-                                         all_stat['type'],
-                                         all_stat['natoms_vec'],
-                                         all_stat['default_mesh'])
-        self.fitting.compute_input_stats(all_stat, protection = protection)
+        m_all_stat = merge_sys_stat(all_stat)
+        self.descrpt.compute_input_stats(m_all_stat['coord'],
+                                         m_all_stat['box'],
+                                         m_all_stat['type'],
+                                         m_all_stat['natoms_vec'],
+                                         m_all_stat['default_mesh'])
+        self.fitting.compute_input_stats(m_all_stat, protection = protection)
 
     def _compute_output_stat (self, all_stat) :
         self.fitting.compute_output_stats(all_stat)
@@ -285,10 +285,12 @@ def __init__ (self, jdata, descrpt, fitting, var_name):
 
         args = ClassArg()\
                .add('type_map',         list,   default = []) \
-               .add('data_stat_nbatch', int,    default = 10)
+               .add('data_stat_nbatch', int,    default = 10) \
+               .add('data_stat_protect',float,  default = 1e-2)
         class_data = args.parse(jdata)
         self.type_map = class_data['type_map']
         self.data_stat_nbatch = class_data['data_stat_nbatch']
+        self.data_stat_protect = class_data['data_stat_protect']
     
     def get_rcut (self) :
         return self.rcut
@@ -306,22 +308,23 @@ def get_out_size (self) :
         return self.fitting.get_out_size()
 
     def data_stat(self, data):
-        all_stat = defaultdict(list)
-        for ii in range(data.get_nsystems()) :
-            for jj in range(self.data_stat_nbatch) :
-                stat_data = data.get_batch (sys_idx = ii)
-                for dd in stat_data:
-                    if dd == "natoms_vec":
-                        stat_data[dd] = stat_data[dd].astype(np.int32) 
-                    all_stat[dd].append(stat_data[dd])        
-        self._compute_dstats (all_stat)
-
-    def _compute_dstats (self, all_stat) :        
-        self.descrpt.compute_input_stats(all_stat['coord'],
-                                         all_stat['box'],
-                                         all_stat['type'],
-                                         all_stat['natoms_vec'],
-                                         all_stat['default_mesh'])
+        all_stat = make_all_stat(data, self.data_stat_nbatch, merge_sys = False)
+        self._compute_input_stat (all_stat, protection = self.data_stat_protect)
+        self._compute_output_stat(all_stat)
+
+    def _compute_input_stat(self, all_stat, protection = 1e-2) :
+        m_all_stat = merge_sys_stat(all_stat)        
+        self.descrpt.compute_input_stats(m_all_stat['coord'],
+                                         m_all_stat['box'],
+                                         m_all_stat['type'],
+                                         m_all_stat['natoms_vec'],
+                                         m_all_stat['default_mesh'])
+        if hasattr(self.fitting, 'compute_input_stats'):
+            self.fitting.compute_input_stats(all_stat, protection = protection)
+
+    def _compute_output_stat (self, all_stat) :
+        if hasattr(self.fitting, 'compute_output_stats'):
+            self.fitting.compute_output_stats(all_stat)
 
     def build (self, 
                coord_, 

From 3668f1913c1ab6803b30e899ac8348983df97ad9 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 8 Nov 2019 18:08:06 +0800
Subject: [PATCH 16/38] fix bug. return to old convention of pass merged data
 to input stat, pass unmerged data to output stat

---
 source/tests/test_polar_se_a.py |  2 +-
 source/tests/test_wfc.py        |  2 +-
 source/train/Fitting.py         |  5 ++++-
 source/train/Model.py           | 30 +++++++++++++++---------------
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/source/tests/test_polar_se_a.py b/source/tests/test_polar_se_a.py
index 0506c84ff2..275b4fa707 100644
--- a/source/tests/test_polar_se_a.py
+++ b/source/tests/test_polar_se_a.py
@@ -49,7 +49,7 @@ def test_model(self):
                       'default_mesh' : [test_data['default_mesh']],
                       'fparam': [test_data['fparam']],
         }
-        model._compute_dstats(input_data)
+        model._compute_input_stat(input_data)
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/tests/test_wfc.py b/source/tests/test_wfc.py
index fc0d850b01..d4b408cd60 100644
--- a/source/tests/test_wfc.py
+++ b/source/tests/test_wfc.py
@@ -48,7 +48,7 @@ def test_model(self):
                       'default_mesh' : [test_data['default_mesh']],
                       'fparam': [test_data['fparam']],
         }
-        model._compute_dstats(input_data)
+        model._compute_input_stat(input_data)
 
         t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
         t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
diff --git a/source/train/Fitting.py b/source/train/Fitting.py
index 650aac8115..506f8188b9 100644
--- a/source/train/Fitting.py
+++ b/source/train/Fitting.py
@@ -402,7 +402,10 @@ def get_out_size(self):
         return 9
 
     def compute_input_stats(self, all_stat, protection = 1e-2):
-        assert('polarizability' in all_stat.keys())
+        if not ('polarizability' in all_stat.keys()):
+            self.avgeig = np.zeros([9])
+            warnings.warn('no polarizability data, cannot do data stat. use zeros as guess')
+            return
         data = all_stat['polarizability']
         all_tmp = []
         for ss in range(len(data)):
diff --git a/source/train/Model.py b/source/train/Model.py
index d00d1bfaa4..c568dbe9ec 100644
--- a/source/train/Model.py
+++ b/source/train/Model.py
@@ -126,18 +126,18 @@ def get_type_map (self) :
 
     def data_stat(self, data):
         all_stat = make_all_stat(data, self.data_stat_nbatch, merge_sys = False)
-        self._compute_input_stat(all_stat, protection = self.data_stat_protect)
+        m_all_stat = merge_sys_stat(all_stat)
+        self._compute_input_stat(m_all_stat, protection = self.data_stat_protect)
         self._compute_output_stat(all_stat)
         # self.bias_atom_e = data.compute_energy_shift(self.rcond)
 
     def _compute_input_stat (self, all_stat, protection = 1e-2) :
-        m_all_stat = merge_sys_stat(all_stat)
-        self.descrpt.compute_input_stats(m_all_stat['coord'],
-                                         m_all_stat['box'],
-                                         m_all_stat['type'],
-                                         m_all_stat['natoms_vec'],
-                                         m_all_stat['default_mesh'])
-        self.fitting.compute_input_stats(m_all_stat, protection = protection)
+        self.descrpt.compute_input_stats(all_stat['coord'],
+                                         all_stat['box'],
+                                         all_stat['type'],
+                                         all_stat['natoms_vec'],
+                                         all_stat['default_mesh'])
+        self.fitting.compute_input_stats(all_stat, protection = protection)
 
     def _compute_output_stat (self, all_stat) :
         self.fitting.compute_output_stats(all_stat)
@@ -309,16 +309,16 @@ def get_out_size (self) :
 
     def data_stat(self, data):
         all_stat = make_all_stat(data, self.data_stat_nbatch, merge_sys = False)
-        self._compute_input_stat (all_stat, protection = self.data_stat_protect)
+        m_all_stat = merge_sys_stat(all_stat)        
+        self._compute_input_stat (m_all_stat, protection = self.data_stat_protect)
         self._compute_output_stat(all_stat)
 
     def _compute_input_stat(self, all_stat, protection = 1e-2) :
-        m_all_stat = merge_sys_stat(all_stat)        
-        self.descrpt.compute_input_stats(m_all_stat['coord'],
-                                         m_all_stat['box'],
-                                         m_all_stat['type'],
-                                         m_all_stat['natoms_vec'],
-                                         m_all_stat['default_mesh'])
+        self.descrpt.compute_input_stats(all_stat['coord'],
+                                         all_stat['box'],
+                                         all_stat['type'],
+                                         all_stat['natoms_vec'],
+                                         all_stat['default_mesh'])
         if hasattr(self.fitting, 'compute_input_stats'):
             self.fitting.compute_input_stats(all_stat, protection = protection)
 

From 122007be2c2fe2e0d6446441d912e6acf09e7796 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 8 Nov 2019 19:01:49 +0800
Subject: [PATCH 17/38] add missing test file

---
 source/tests/data_modifier/dipole.json | 60 ++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 source/tests/data_modifier/dipole.json

diff --git a/source/tests/data_modifier/dipole.json b/source/tests/data_modifier/dipole.json
new file mode 100644
index 0000000000..283ebe161b
--- /dev/null
+++ b/source/tests/data_modifier/dipole.json
@@ -0,0 +1,60 @@
+{
+    "with_distrib":	false,
+    "_comment": " model parameters",
+    "model":{
+	"type_map":		["O", "H"],
+	"descriptor" :{
+	    "type":		"se_a",
+	    "sel":		[46, 92],
+	    "rcut_smth":	3.80,
+	    "rcut":		4.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	6,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"fitting_net": {
+	    "type":		"dipole",
+	    "dipole_type":		[0],
+	    "neuron":		[100, 100, 100],
+	    "resnet_dt":	true,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"_comment":	" that's all"
+    },
+    
+    "learning_rate" :{
+	"type":		"exp",
+	"start_lr":	0.01,
+	"decay_steps":	5000,
+	"decay_rate":	0.95,
+	"_comment":	"that's all"
+    },
+
+    "_comment": " traing controls",
+    "training": {
+	"systems":	["data_modifier/sys_10"], 
+	"set_prefix":	"set",    
+	"stop_batch":	1000000,
+	"batch_size":	4,
+
+	"seed":		1,
+
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":	"lcurve.out",
+	"disp_freq":	100,
+	"numb_test":	5,
+	"save_freq":	500,
+	"save_ckpt":	"model.ckpt",
+	"load_ckpt":	"model.ckpt",
+	"disp_training":true,
+	"time_training":true,
+	"_comment":	"that's all"
+    },
+
+    "_comment":		"that's all"
+}
+

From 83509c4813034cc121dfdd7fa61b19c2446c395b Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 8 Nov 2019 19:26:49 +0800
Subject: [PATCH 18/38] add missing test data

---
 .../data_modifier/sys_10/set.000/box.npy      | Bin 0 -> 488 bytes
 .../data_modifier/sys_10/set.000/coord.npy    | Bin 0 -> 5888 bytes
 .../data_modifier/sys_10/set.000/dipole.npy   | Bin 0 -> 2048 bytes
 .../data_modifier/sys_10/set.000/energy.npy   | Bin 0 -> 168 bytes
 .../data_modifier/sys_10/set.000/force.npy    | Bin 0 -> 5888 bytes
 source/tests/data_modifier/sys_10/type.raw    |  48 ++++++++++++++++++
 .../tests/data_modifier/sys_10/type_map.raw   |   2 +
 7 files changed, 50 insertions(+)
 create mode 100644 source/tests/data_modifier/sys_10/set.000/box.npy
 create mode 100644 source/tests/data_modifier/sys_10/set.000/coord.npy
 create mode 100644 source/tests/data_modifier/sys_10/set.000/dipole.npy
 create mode 100644 source/tests/data_modifier/sys_10/set.000/energy.npy
 create mode 100644 source/tests/data_modifier/sys_10/set.000/force.npy
 create mode 100644 source/tests/data_modifier/sys_10/type.raw
 create mode 100644 source/tests/data_modifier/sys_10/type_map.raw

diff --git a/source/tests/data_modifier/sys_10/set.000/box.npy b/source/tests/data_modifier/sys_10/set.000/box.npy
new file mode 100644
index 0000000000000000000000000000000000000000..4935ebe88cf4f03b30b6310fad4ca9388d1615d1
GIT binary patch
literal 488
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I$-2099snmP)#3giMVM%C>O3`pRw3M&6SkRPaxD&~vEw*&I44N=vUqwzC=
z{MqKHYNn&{JAwRjwy0`Oq49SE`35eiYIM-}vOxYVFH|+x(fB8T{KtW)YVM=)F97*r
Xk*I2d(D=?kzC|Lc8WS|WDv%EV4H!>N

literal 0
HcmV?d00001

diff --git a/source/tests/data_modifier/sys_10/set.000/coord.npy b/source/tests/data_modifier/sys_10/set.000/coord.npy
new file mode 100644
index 0000000000000000000000000000000000000000..7bd26fdc0523cd22861e958f190818359f63515d
GIT binary patch
literal 5888
zcmbW5_g53?`-Tw|q=R&@&=e6wl+Y8N+b$>;RD{qK5ESW%9m|S|K|}?m1OiEbwSj^G
zg#>jK*Hy%hZFd#>x@$!gL-9MGf8jg7%$fJhoH=LC_1^dO%%O$L0~d!5Q(3RF*&#MA
zV^z9?kF^6kp5@?d?GT@mo|zt#9F>wD8~4BUIWb8Yam;!~LQHBLb3JjYtF!e~7R%Au
zdaL#S@6cL$y^F1Q+w1#mMg=>o%Nw)9_bXg+TM_n{B_H;;CpN{Va3%$Kz<(wiqonGb
z@9+PWmpMM;EIa6qs#-7n8)eXD@x}+Dbmo3<A8fqE!U0z=$h6#H`hbP|vFGW@tvX7Y
z+e#@Pd5~K+&?=Hr_W4sZ>h)gaOl~DJOFp<#0sOYgs5h*cPAE^1jU699jB}B*p`BWH
z@Udr6Bi(w*gJ6aL6UOHuK7kLD#{vivm(aSS@pR20g8EuA!ESb>EU63W)frIis7UOr
zv8S!k5n$h6fg=I#)azkEoBp0mFIB=Y?9FP}eOy7@z$83Wn?n22gD^|ZMZwr~oVt;Q
zw#zBF&@+`%4jW@2RtG|d7|8xHz+OE=I8A3$ldJ;A%s&yYB@s8uD-h<RB;lE0`f1)v
zN1My=b8aw>xLDvwW)%ioJW2GR7|G2-%nOUa{C#4Sa;lK%B!utUI4Ya{gN~`0z@$%;
z>S8T%&9)Tw6VoufZ#d=^m_v191g`8e#Fw?zm{y@rj(<z=s>}p6iB_~RM}jM3L|C;f
z7O~GnDCMakJ0uCq_f(+LqmOou9inGDf@oBi6zM}+FiN$jrv1GXJ5h)av;67K@fy_Y
zOTgVZlPqScB3#~2DNUKku_?idDO%91U4w@oG;!pD2>I(nVHR4A{@@y1njea18$YlU
zuRZcjwUe=TU-ZW7(|pB9`|FC=2YXsoWS&@wjT|GP2d2fd(Y8pH6YBRsF1Y%db1Kvw
zQ!>0@-eb^KXXK06&XMxF={_jF!NPbiFZeJVDF;}XqjjGCc~nQo54KYD01uxxG|<2|
z31Rgqs#X;sYk4c_sqwM1M1U)&WHesAnM%}8QpYqtCXdR+?A&(h*~CY8eIw0e_;GX-
zplVbeL@9izz7b&Dfkjlti6u?1NV;8>i8^g(s*ZOdt=IZ=TrCp2e%euTPz1g&4nae~
zG}<3*POFbiB3?fii+U6AMn9Acqmu9<Y6?A{5QL64E|zRc$JX#PM88i#k>?b$t};RC
zVI3Hy#h_@qA-Xph;IGvj%1NkzN9jizR3u{Ng$jiBC}~eCmsa%jknnUFKFfn~#@zyu
z@+uhAyVKE2#n`k(h%<X4&{iad{f9~fOc7%DidfpW{|9kC86*3<I(_W408J{zhrv|L
z{$)6}A2!GEml0UB-VhZ^DMnQ5Q|6Eaztou^c=>1=b5(*B(IP10V$o9}!uc{aC?6zY
z+>8oj`1a95t09uVSxS{1QtWrp#_ZenbU^zXbzd&S(gc5M{Hq2wonq*21<hKgiex(_
zWjAD^#jFIUwrOId$r|_%*TPK4N@$%9#o%WtwAE|C=Z50#O;t`rzub3@K?nQAQE#lx
z+NHSZeOUqBO|4OTJaK2)4o>o057_6j@vN+$?T~p%e%tRo=gByCOyqcB>r}%w8Tn%8
zzm2W^QXe#5W?|9{FXR=m@WS35+mE)<?{Dfz^=T`~HTgLEpn*!yN@$enDOxy2fW7Ih
zRQH|-+9$yJS2EgB-$Y)cPSUQKe0Uh-;@tjr+L6Y`xKU?Fqnn3OHUf00<l$8&AKyO<
z@ZXI<TEUAUr&&?7Cp8mAH7;ap>q4$x`lS0k0xOg4C}3d(qMn9e?EWbfyv&U90v%}N
zH!cRptU>x#E^+oG;n=Y$v`!-k$$xTD@iZO(y-UST>r{+=?nHGbO!44}4qjKqV5r;>
zZ=dMn_f$WcH=zQ9@gFJwVj_f3E3mespQay-q)EyzRMb$0!nrF@<Ys~8I${Jqccb8@
zVwlbnBBVV6E=R=pQCtb*sX|P5TSeKAe^61kF+O*yk&BZB45yVs`&lZEXb#7QeddT1
zM`FoRLnQH}Xid?lyN*&+UNJ%Dt<m(Gp%k;rM35B3!ZuWdY>^szlh$G6P&pj4`Y6(I
zhyoWbr@s|aY+a{~c#ZM2ukag*R+Yi4+@A(U)FP}zjLix*)l{pZ>RcaP%g@AyktI;C
zABp8fiP&VTg~o%Gc&f_9Z+TJ(o2y}v6^he;>vGP268TCBp0n>2cq6H7qoTV)u24LS
zK6hxjCu~}FbMCr$!brk~h58rv;(e#(7Xo`YvR5qZQul(+Xv4NgYkaYIs$J{*n?8ta
zVu7{L3!lSS_%+ZS7iY9l`Hwo<<|L=nW_*0}XryJkC3LZ~p4{C9c)Y8Xl#h6Lo+Chy
zWfL9aHj&iwB;8%c$1nQ1=qPNb`Zzu~nP=#iCp<*!2+;pgfQ$`%3@8P#c3(iNMUm9G
zG@71F$i&+%Qz`!bWV(1vkG$VU;LHzOvI=21+8GMJC}$GRFeTA5J97QXMJRVIqMgDi
z=TZ_B@28O1I0$D=!*Dww1NXC2agUpdSLzc<MP>%!R9&c_je*8bLnQj><5v%&im&B(
z&ihEeUP(lrl40q$0SYx*N$cqg^|h6u;nfN}ooIoQRbm*fo<_ZO#bD_PaZ)!DBbfYY
zno$Y4s}Qd0D=FSaNq6oWL!(fIj5nI&wRb6Ab*ADHUk&p&n<F$d3R@-{qTgJK?Y(+*
zJw}RK-6k-2F`D*;Nzv6Q!j(I*_+TJHLy;P88YQDdUXJF!`{?M5Au2l?OskGb;oYJQ
zOXKlm?ev{k8fDn@r$3#yuZ2;%7#2&tDCa*_H17OPCi63K?rSmrxTJwl$3&c)qXoIL
z675M`+?*^$erh#F7KY-(WOI(%qn*B!#`dxAXM1D(y|s#`>1PzZLtNirYfm)F`JC^`
zp12`r!|mJ~HfN7mE<4-D={nAI(=AW*oi=EDQ0R-4!&W{~Uwx1(VWBI`3y<wt@LKN<
zpDqOz=+%==ww&T7@KJKOkw!;K==7m_s`eM4o7YM&f9K&wngF&-nrMSo6a6vfBss?O
zQNiTT=c0Cczk-kcyJyJx9uHG{_u{}a#(S7fGf~aMqbC96UK&Q%l4Gdjat88tvM7DW
zWTwaU=#SSCI8tj%b2B0!^5vqz(vg;XnvipjHPtA&uofrb-H1qft-lUwPOc;i41(jj
zFkETQz|CQ)s4GrI^KW(}KVyzHhjbCt5QB)=QRqFahYv!c2M@}jrtyj1btXclFT(D-
zgLHdz9EINdO#eJCLqcH);!G{zd{K<)i>6ZNiDKMl*7tBDQF2g>-j5>KxCkM65KUL2
zlw@_?7(vZSzm~IRI5oW#F4t0VSzQgY;>=;D5{(x+hPYKP!Q-jS-l&zLW55KhrK5=>
zm10G|2zis@5PG!&!8_D&=J#Zj=al0>+7A+}93qcNA=GtHip54cnAvYn7B|1s@P<+h
zjSC=KpIRIb5#vOV8_j?kO1FKZlW#Nd{jXv~TvW&Jw-XSUtc4{}RT%b>i+>(6zQwA>
zW~R&Zb8R@;BNq5h8Kcgb7wwJmj}eOaJI56z(x<+`OmAJCvyW5#t0(5(Wn)^wpX`tB
zN96Yd20540Sx7JQ#KT_wwiOCr*lMQxZe%>DW*-YT)_5W3vm46G8OM%QP|Nsw>boGP
z9(O)szco_VObJEC*OMJrfFnCvDXN)=HLC>3+|xwDi!xfn<d0@H9~z85D-N~Oqj`M1
za*>hXIuB(}_TtqY0k&=CBT6F=)a6ev4}?%@*-GkWawGq^C!KPbOlInOWW+ebnXR_u
zC62%ZoiIFjIG$nyjLB%u7-H*%q5R)<DEb^lP4Vk6Y^5tLJ{5$lzrv7Yn~5cRQlQ<%
z^oQ>_np16oOr<Wk1u+<XV-y0v>Y}=ksOC;NuD$p~$xjl|!}#lM<q)YKUQI9BKU0aO
z5aEkMp*3g@9eW8nj=54`eKC|r%dns(66Y9xhNL36ISP?{DvBBll*B)8jMR*;ekb3V
zLg-(LH@8wTVV5eh7MbIDel!NY8(`963EtT0QXb<$B}S$YXpSbYcT!BXsl@T3I7IKL
zKy<tsUhhnSkAFF~PEgXQT|*?Q4W)mxrLfG_f%1Yq4dnFE?D$d?t_q;_b87Knff&_H
zZci;&!;)jYl(Z%TgYCtLJvjm?`3X3BSPPQ2D!B5)pnE`qg&NhkbTyRmK1a@YS0mrm
z9{QXk^Su!}Z?R&*J+b22Y6o_3od<O06>vU1@kH)xHk@*AvAY)($nULH@w@87!lwjJ
zTzae5X7th*%_si!mAUw$c0CKto4lZQ$qnw8+#&xUr{#Y2q&HGQQFHj15`2c4fh5#7
zr=D`-1t?E$rJ29-;Iv$TY(*0}@05|9!ATmvhYzyMg)8GfEN?yzFzdFz@lbz$FZ_QO
zpoHP)rB)tRx%<=hisdxo=_)d9Nr(A+AG&(onOeT-QoySST;Fa>L(F;d*D&~=u%-P6
zji_?5C5ha^;NqMD(H}8%^uju5?{%fWyp|$yRXBdn&BR~oDVV08hN)sps@i4=vq(K$
z3W|ZLml19()5UMrMC)Ib!)f#va(<r(OM4L_ELG5+kx2I~zfjZyA-YVs%pNv}lR$#i
zxvun4R*YMl%dq;tNOUs(QywS6QyU@17Dv+PQ%b5kZH$C%AN<A`n8GEf6wA9)adwz0
zF0;*X@n<wVz8au!Kn!80Hf;>6Mx_nI*=#GSa;ionDp741j}A@+PWr3ip<gQguqsDD
zuaXKXhA6>3j6%3lG{4Y6#~OReX8)i&<4e&Y4IsmSTIeqnqw|LYZ5&d==bv9G<X$=k
zn7)n9ACCVb640d3!a_4KqJ6@#HbjEy46~M9q3}89%6aS1>|-*{jPudQ8|S9XR7mqn
z6!U{4*p3+<keoWmG1v6MiEnH;NG`Fr_~*;rM8o`M4rd|T+Y^WKblVgo*+{xDhTT2a
z7hTaTa0OnNQR;@Y*X}6Fl2gu#dJ@f7&`G9;8f0haj;Dm~vFhnfvH-42T4`M=4_R{s
zIQ*rF76!_w`{xPzP{g>lT`soq+v!h7KB_uolzo<m^;LV(s}Nu!!;f4i59u2I)cPQZ
z{$|I~NaJ+)aQtX~x-;E3)g$+J5s)|9(qm2}I=+O%Qap|x+Zs`JlNn_thaoK_6<by@
z+?gc9vecChK3|H|AK|F$&cv3SWDFq<gD$3&=4pkvMm@Y$M&q)?2=W#kh9N)Np;v*H
znjUH#z6J?4BAA_5!KIcpbmsOK5{QLR?dHO##T+@k5`@2F^5;S^wl6M&zGD;)NW^%^
z5@E<ph!ck+=;}EoZLKuM*w1hMeoiuhML6TnKT=V=dl=rhn4@4J!|n(}6rL3$$V-cs
z{ZWlYwx&24Z%Lcus}aPlL{C{fCj2ai*i{W3UsF->LkOL&eiGIX(Y3B{YWI;sV}&mE
z57|+Rs**Inl)(FC0Qt<W#R0Au$DY}cQ{-?g-SCx?oYJwOtQfo2s^KLk0sY-t;3kOC
ztP+8(t`ZD?Dus;U$1To-0~<4+m(nqupef$yeCMIKtzV!xbcM&>=IMc^^@W^#=6}wx
z;T(h*G_faS9gqv$RQ>X9x?#VrC$7HGZqs&Rqcp^y-4pGL6AM|`DD*;iwi`4wn7Los
zO8Kev<Rnl~TOz}Ov5Z3INXXEop2QgfXgIZ!Xde#^J_1-xXr@>@8Lj;H1a(OGxa*n=
zn_t>VZpF-E+D$Z2$3xHdy)bANAdc~$4-7vWmiv?IlErjQ7EiaiX>cf<PEj__%s17e
zq%Vv&o7<7FHWJ@QL}J^;(Ztg*qG5a!l3fTxL|hsU7sS#4HyH+dTxqa&8QvC0pwu-B
z`#!D1kAyTtjU7do+pX}+2R+ydq7kWQj9U&m@LR%Ry08MxS-mu1vj)y4A{<H_hSOWt
z(yHzrnxt8dv2|h4JY<f~jO%JVnM%_-i!pm*8R{4pp4lvh9`j6!wh&hv!fE?4C8Y?A
zk@M#ZKMy}+L?x6$$0QAJ4ToXR1arLD5Q8pfLtObqj5ljWQpb!Mj5RccY?lSyEw09l
z<Vu)NTaEH7<tVgP!}N+Y{P$7_t&0P6@$?YQ&yS=o3n>OWbrHJLj%;r!>Gt&!sMyaZ
z1Fu@lNoCkLX+`Nx!*NpkE4|80gJ*s*@)xLLU{V5FRJ9R#T8zd=5wNmk^5?Y_eoTK=
zUiRU{TX4@=RoHTJ9KCV+xRYXBY_8&ziI{CZ;10_}MVy`>FFe!ZV2rYsozz((FR@qk
zGcI>y=4}sb`lQ)*E`W{OwJGfG&Aup{!NP@;UI+?z!+m=X$hNl9J6=6SUr|uf7CtVf
z%ZRg5LOj=cVr4P&_h1VNcktjmMS!#w&D5<Xqc`_X(EU?<{5Cxo{W<Maqt8bZyNNVw
zc$gHq7nKc+|7_%At9Bkd>;veq)k5;~PavHGsWAU{7I{8%B8`_!{tQN-qRx(Z<D$Tu
z6a}`9B`uh3M4ESvC~i_XM*FA3VnQ6L>`lht99Od3wH#M{BH^2!g+T5)JldUx#-n-^
zVm%szYWkR^9*uvqjIp&`8_m9K5*Jp$bHjJ~Z`K;<{;WW%xhl@zTT8$C^wRy!<v6=B
z9Fd9U*l<&d2f=Q%<7qJ>)yj~#B?^Ui#dxM6!eD<HwtNYrz#JtVPB+H-xljG(1sXwj
zLn$n${qOxM*kfmoA6sLPm}H3jm16vwp-$FUYOv~^2^O9<r_UFvaeqrC_TOF&)uM6)
zja9=O_jE)*6@ptoNc*aW=yg~$jp~yi#$OL}25jj-RX_0>OE4pGKJB)xMPwes<!%!a
u4I6>*h93GZONGncV$Ad&hUeA^aJSY*@E0-OU5o_JL;@?7YK%S_ivI(zZ=R?C

literal 0
HcmV?d00001

diff --git a/source/tests/data_modifier/sys_10/set.000/dipole.npy b/source/tests/data_modifier/sys_10/set.000/dipole.npy
new file mode 100644
index 0000000000000000000000000000000000000000..960a39f74a2b64e5887d18a5b4541d41115e98fd
GIT binary patch
literal 2048
zcmbV}`#%+U8h{TnljM@xE)zORm`TcIcTG9(d%ApR6d$Ix>rA1@s3@5k6Um@Lg_RJZ
zi;OL|m>TJP-}exri%X$wY)HB|uBlbYZu>9n^V{?M`kZwAn~U2fHOWts5c3`0`?m+0
zFOix%_}H0SO3i%&0)qlQ{kH`K?(qI!cl6x7-}_VF@9VkO`;%K(FSeB0*?(y%Jt+M@
znKb!H36suNQ0|IUJX)ZV6-n!1w|^qb$F?XMbcC#{E*UqQMB|I<qheK;5lOaWi-cZR
zGQO9O&kfo+<GeW@oSj6r5A@_+hZk_eB6adtg@IdIA~-I7%)VX@Z0F&GdwRN9BVsAK
z@f9|-ErKr^A5y%@W1MFt<EFTsQ0nrlOyygRsp;XeoyG5P{O(kcB!4Xu%8O;=z8xIi
zU?psVmQ(1WIP|!##o+vm3+?r}>ezMm2M3UEl!yu~EijPQK(BTZ>(ARF)_gRBbeGfE
z<9GstgA&BL4ReSZ7trp9Ka>B8d_3>?Pi{^z#DU2LbhuDQUKeh`d)p__ce_r2kM}ib
z>1pS<B0C-^Sc(PJJ$%UX&*(@6Tw(GDl%5pieWeMn@lqbE+6P5OKC&UNn>aRA8$`u0
zcI6ttS@l>4`QQhK(1%^zq4k?6@~|NYK7r0={baMRlk2{o$$GPuOj!mnxE)2u`^yg6
z+djk3TvPakq+IAf`T^dXr{VP*C(&-%VIetXLTxX#C@<|x(mj0*{ZtRR`K=BX<<`>o
zt0&2KXU^e{)kESd)k*MMdLLFeJmD9AHRHOE3vh34FCUq1g}JSlIZAOIvU4xc-Qs(A
zV4XdW4ep2Vl4Y`))r=c8PRR@cUZH(j1Ppr45^KISBHe+fJfTdTR+Y@5HQSD3Rr5VM
zFsY4;+l_gq!!0&lRWI|%h!@gMf0?XK6PK8u<&lLN^wXPEXq2VnSadpCnjRH~pBYl>
zCO=XS^dnd0P4q8oVhv{rUa#q-(}SAwk(UO%*p9>m+hiD@H3X|ZeRmm~!38_Mz|#Cd
zUOLSH`z>;Kac2x%S{p-#Q>yV)hz;-DR{}Lz=Y-#2CAOLCfQLkdc1aOXQ)eohP`HxZ
z-@Ia}WFd`wGl_JZk72RXWjdAG!h5Ucv5Q|l>rV-k)tpg?hLLjFZwECo-}xV`x?o27
zcooJUr=ib+3mB~)B~+?el(r&^65Z0s=UgM|YTRc<Q9E>6ejv*NiTtpygx%D<MUGbr
z_(o`8U|1)cU((=~5KSC)8{wYhNqFy%9FFjcfi)&x6mV2vQLF`L248`MoyFo^LltJP
zaDsxleHh+&7<QCBlZ6KTNUigSct^?>3U_T0PRX&jMR9`e$29Qg9i|*V<spa1q&v86
z9TFBr+OY1X22S5z%n}b7sh}QCO-aL-O_y<W-X4+k+<+QNt7&Uo1?~P%8`{^@v72B}
zluu;;*f$t)>=DWH0u)9~3E*X{i+S;#?Ctb~^yhZM`B|g9(Mb*Ccja){Xe3y!bfD7y
ztLRy4#7S@o!u_knn`_mmxP1Ut1oWZACIZ51<dAVFj=IeM&7+c55vJur)~@jwUFu0a
z+wO3yx*6-Hcd$ofxx(YYG?H$&g_uw^)SIZ_uo4%l)O!Y{9;s;9R)RIM^&)Cqn-Z6I
z)9v;?ifDO`#k=nC#;?vo*f$3JqtO7CC0CGSQd?P>^fo*(9D!wjdCo(33#r-uDr8!}
zXV)YBa9*0jLERy+uIf)zx-B0o^Yr=i`g7oB*(J(HZ()+`1ibR-!_DK7U}7H$$==y?
z{M84RXUwKmR@UU6aS|tfw}=`;s@d$!Tz)ms%cW24sY9_}gsW|Y4x=%MwXWpBTWe`x
z#%s_}B;#?98z`A<EZnj_h_<Y^q`a@gcZd4XU~4t^=&pq9?ONRZ`!Lp7<&so)j_mH8
za_Byxi8l4U{KM)9TApwm)|!4`<Ax@<zxE=J?b->C_KypLJvkWqM1x~<5+Swvy?C~v
z1|1{QVDt4}ba)pDVmKRoDoRLcCQ+vTBBvwqdgOj61(P%<P~M9Qwlg;7KKn6_(%wYD
z?k%EnVGs;Qy@j%McX<6lcj~kF2w4Y`P^eX){9V1GG+<CDFOAa5*CuRh`WoA3RPwi#
zx{!NIkL6QF@X53|lE$RS^sR~^Bv}{Rf(F=XzBwr}-C<+GdyZ}{gT!YSctXD$Gz{Mt
zxqr_>E!7~^M@NB2tO2c<T#H4I)1h$WIl9G0!C7|!%ICM~=XN#a<b6Igv*(fM_D)Ab
z6CtABF-PZ3XPt$b%Eb<W^t-Mh+3${oxX=MGZf)d$89S0%v;^<?C1Q)_EexM;cO!Uf
zt>_6FAfdA3R}WsJQK6DYmetDYY^QN>#RxvS7(gAF`Lb@WY`D}t6|FjkSxy<kbYwCt
zaUSK<J!e2=eV%Kc+Q4XGo|vhZhF2n6$uQF!a=$kv-|4m3vLOxLUVMs8yCOkpTMsiE
gTWNW=y7KJXUue`YOvsxvabjGA$o`dhy^Sva7u@^xqW}N^

literal 0
HcmV?d00001

diff --git a/source/tests/data_modifier/sys_10/set.000/energy.npy b/source/tests/data_modifier/sys_10/set.000/energy.npy
new file mode 100644
index 0000000000000000000000000000000000000000..0aa3b12a38dabcb9f233b94fdc4b66592716dd74
GIT binary patch
literal 168
zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh=
zXCxM+0{I$-20EHL3bhL41Fn{dFOOzUdU;f4^2?($f%q*Dt4w)$)EkJ?fVdHeHv%yL
Dz!Wx$

literal 0
HcmV?d00001

diff --git a/source/tests/data_modifier/sys_10/set.000/force.npy b/source/tests/data_modifier/sys_10/set.000/force.npy
new file mode 100644
index 0000000000000000000000000000000000000000..9fe56bda580de79e873d7539a8c7690595a8edd9
GIT binary patch
literal 5888
zcmbVQ`8${U^G14?k|?Aidx%0f$orY0>{}|LjU;JTaZ2Q5$y!RGEM-YaM4^;;UZW^k
z(xD<rbx^c#TBPOE_h0zVb<GcR|1{S%Gxy9rDXtzaZp%bPc8Y{)t@RJ`+o3g2MN7EO
zSW8buYhB=u;2pkOeFAr^_5VNI)^~G|{~tVPz3(>vKe4u<fu4$?v9XSxO1R4Z&&KWD
zX2_Q}>oB+Sc+gw*4~^N{fI=A&%LqSh$hetLC3ZL1t1lO5hOQ|+l1$;p%+KXkotXpG
zlfJV=*@+~7;tFo>*w346kHY~8iCVt3jXVsO;vu<g_APQ7F6aOf`>_!3<vD}AU<3F@
z7jV1t&*GIGY8d13j{Wz(oW9k?;Z9i_s@7cu-v>(A;lr1}u5Sf4nq-1ThXkc1U8Msn
zzrY2H2C}xjLRD$mkgzy`GK}|<O`RCm>26DD7Jd99byxbBV}zdN93Qi7F34^^$m#n?
zLtl*#BuGxdpJx_R{D(H?Ge?yCb#Cy9vkv09<w7)zb;p3m(M-IaFt%+a3I_XOC`FCK
zr1#+Ce1$4Eje|`Ai0%Wn<oa;}bd1ht1}`!=+w>-gD>{r$Kg{_nW54pNf)r5JyNADO
zB!Z`MeBj>;t~e#?3^P(t!^q%P&^j-HZq7?VXt)Shmzt18o;JC9^zeIg7qC3-I!?2G
z6STbe!H)VpV@4t2TzY9cyibc|8KFp9GVj2tin-Ld?jU>JdXN3uu8A*Bgu`y9NLqaS
z9vAskg34_-qw30L_Q^<w5{8yxYwUaBvv+&&_U}@V6n|QMe*blLB2Wq|KR2-8bv|(G
zJQHqkS_e(W+BB%-g=sq;QrX=eSnFVlD%JZ)^=%j1jE8A~o;=DdX@>lGXQ<>I@uHJC
z8&Nc2QTApy<6bd)15tcH%ITU@F1qA*D+`Odo}zKt$tnlSV^q-hleLL8&{cmI$_+AL
zCM{Cb`{FixSigzI`BktP8yL!86lJwXmSI=$nA$@MZFH>P9Q|vQIi;u4*f?VmDNZxN
z+l3rxw(p0~*=bzIm8+<KF9yxuMbqsHo{mn4#0h(Ks4vJ7ASRaY72c}KyQqQ<GsfW(
zFB$q!cah%9>;%E0a+)H4foismz=@bxO0$ZgjpwW(Oih)JE|KORs4XSmGI>n&n#YSj
z+z!UGD>=)lj*#wJ0lnumF@5TET5Xrc^a@K^pYPJESM&XmTdRu|ryS99z5*NWX~2|>
zAbd2c7^F)Kgf<4X@Xz%^`Z?>I#qTl$+!(o%Vh+xLJ<tATZOZX%iIXF)RXU3s@9YxF
z-dfJ^mn80L>18KwJEGAh5tO}Xi7Mu~Z1G(g3@MPo>bwu2Bkc?W3#Z|Y;ZH1oJF%1y
z$v-}hsqDQXq<zLPwYz}X+<L=Qc243z^gn3q-NG(C7Le-ebZ$I0GjFLEH9G4Ablk%p
zL1h6fxwD6sWgp_syNsiMFZ*JQND4dreH@Mb1vt~Qog1{=j4F>)!OZVo^;-2xc44bH
zW_dB@KW-gZg`0wy-V7+Zt4oP;o6!1w3+>kY1<BUSvDh<-X6DqeX?byETcd;N$=NV&
zrxDk=d?D87>Viw(QrPpO9cG6sQ<ME?3!Mp3?6Z6gGaA1U{SRNm8(zQpcize5A81Ny
zjrS?3#Fgszrn88$INDvifpVq|RoiDh;m@~K<6uoRyX>fhUK2%XqcX42<2llJTI~XO
z&klhxWtOCHR0d1?CApb*@?rRc1gBAT3$0VjQF-AFTIKpLwVVvaIg_L)w=suH`Vt3^
zeEL|!<nyp(#0ayWDpO4MNy^ZEThqEEi`I=hPixjp$2IzaWKa}G(|sA7AL?e_E6xb>
zo0pQJt0?wgOXn+xkHEtvE!^qJk>Kd@15TbZM(O4TCc8zznnu3yar=L8W9QC7^)x9=
zQZ~kvuwZ_Sdjk&dj==N0GbDx?@O3lyz=jDaBsV0=suW~#QQcPZZdZejU@7(hr_!#3
zqZl)e@YGo^Zq>=-Y#}d(y)UgP_xu6W$#=%OK3()2lg9QP6~Q4<bBvIG02{_zgK1Y4
zoIbUOO$c4Yo9>ij(RT5?k3|~qqw$bisym()IDcoYA!{u*_6>r4j~X+R$zv4~`&m#*
zH7~BY9QN2wClMWAJhf$j3k}&wGY3;G+=j(zB+d=XET%I_mk+GXRta^kJAzcL2X1-p
z1(UzO5XM@aW^Kts@M(WFi;@V0h~_JxBvQfEEYYFL$RM04T0@euBB*+K18RIaNO$6M
z*|Dl^)bV&Ojx1ja0cy|qFaArh@54(-uO5d*e)51XR?xCz=PYEVjiod2ML4GQC%lfz
zM%h7E7UU94@4{EoqkBD6vV1eSan;Oj<!y4EQ%T|KXShCV39uC>d>d23#>UmcyxXtn
zWoI5$$h3pVCm|}8AL0-Ha-rdi18~!K0&^SF2p+9&oMiGn+)><(m+Xe=%FP}!-m(VO
zU$nDro3C+AirRSp4bU37OEr^}!%#6>@P}g#QdHz6RysF^&KQ&vH96q^>kH}Oq$Dcd
zCxy9_ga2^hb<VTWi&`JI!CHk#zVKZkXsdtVDyC(EMekI6?P86oJqAqU!5Z$c;Y;qs
z$YFRH*a&s@Hz7ny8bxl+<2$YEutX~ixqcVUExDQ39r((<pSPYIJWnvqJrCiyY6zXW
zIUUpu_VM#NJ*oJ|RaB_Ejq8VBa#kn)W&8U2p#OXvJ-Awq_O*%VkRXHHj(9eDWj72T
z^}%ghF2ippBQQCy2;zc6H3R$pfvi_L?5;4aW}DI!hU>IBTkBDrxx#lAILE7Iy+#k*
zIN8nH+K93Z8z<2o30p{s?1MWiHR!!y3l3`aR=@b-NHe9&_^7A9*|9h)T&!AI<DSHr
z0XzfOEm`ob+7#2xzi|p1L%C3u!%QjlAxybb$g*awgW(u4Y)KswimlP1%*@@G0tHky
z|2343_+yq!0{Jy~vorf#$d@-kd)cSlR-;IEe~K4oely0;hK_i$GJ;<{KamtOrgPQ{
zZRnX&87N9i!|Bdw)J&>k%XTlO8)DIPY~mYQedBMs9r}T}=#Hw*-us^FD-kSA3n1vi
zCG0-(i0L0F<(k^A(YY93GIuS8hucDN{VT>U76Gk3kOS87v22UnUC=fDOE~1wkD2>F
zV0*fB?Q`~+)Sfut4EZqT+<Q|ftq_jC^|w-IWhztNc^utUhMCU=H>wXZrT2agG)B6T
z4wS6IE{{1BlNd)K{qxY>Xf*%wm^uV?dy?J!Y_Q6A<TJ$yl-B;_-q%!sbBjKz?sUV1
ztX`qOBNnb*FNY<G&%nz$n(IHU0w;ni;j_asUSe+*dKCm<W6nkXN4pkV;{C4X;JK-!
zWG|p*)4yOyJc7obc*hO0HsR5$VdTHzE{^WKi;sLJz_x?m*@moEm{xz1614B2cxE9^
zH+TasVUbL*p9c$t2+S|Zg{Sd4@X8tZ+e6y$?ZOxoQ8ZvrPsDS}%y%-FqRwy3TL22H
zzOczD^;f_0x4<b^l>e!xDNMWXMcx4|kk_M$Ig@6R^~Df0_%xMy^r%zBa|f1F^q!@K
z%))t(+PMXm!R*$If1n`YIZXdG1>>*JvABM*gDVaWXW#DgVDl=KP2A%PkAgL^vt)U7
zTbl-b3fPW@RR_pCFCSQz2ePGMv~yJhAGc@*`Bnevmyb7v-**y|Hg-o3v32Nb5sn)b
zR<qT!kI@F@ecWB0eRMY10;f-PU_&=o;17j=nTzRc(o0Vx#hI_EZ~EVK-S9WF%bHf3
zxlXEfUa=_z=?idX+a)x}kfg6#&MbTLA(9X7XI*I~5FD9<InL4~GS-l)=2*fSrDkUM
zgh6LQyKrB}APSY<p~xip+DBXOlf1JsRwm8khcoo}%UmvM{i)Lr6-Dx!P=z|?*O~Lk
zD^`>qO($+lrv5`!G}sb>11n_c6FWe^ihR)KjtYcKae^8dcRDN+0AChgu1Tgw@QxFO
zsQw1fuQSK#UGDhZ?ax$Bc?Cg5ld$8s5~fwBGAk(!Ha6owuGUGwK0iH+muoHYy!!^$
zpSGQSu25sYE?O};=0G}IAMwZi!bqz$pIiR3lnZnZCO*Cc?M7O0an)=P36!Trm8U@g
zODVSJF^XR($Mv!2ApWT*Q!7dXd6R=^GU@<~Kcfj7b6nY~t`ZpfYL9iJg>3PPpWHti
zvspsR6<%jjF&N6eWf8weapzKs;HH5;SAD6O3%MRkH?~bho#>_L+N(;Mi?`#Jv|JV)
z@Qi8SFJLClgG_&kEc#WuLF)D+e2Lf-;eF9*m{!;a(Rw@|JEQ=%xgN|eC<mTg3u6lF
z-M}r>8U<rnjkVDXa@^~SAiIL3EHxlS!xSg4SV{9l>^O&U9ZW7+9c4dT@@kvjFb%;%
zd={_|w`Uwe?T?q)&WGnI(t9^oKevDk5<*cXQHn0w=wpt21ADIdi;YYwq%XVvqn-tU
zbSPAmlJsWR_L!;EZrBn9byIXu*R&k9pKDOu8+Ed7ok@L7wqWJ^lk?WRjB#D&6zn;f
zUX)95b2GlP`R>;tPpP3syX6BKc@3aL@RZu9<3u~3N#HT78czSjZ{DM?4ELN}MUq+b
z$Zh>S6cmK9dUvy$RI^grF;c+t{;sBTjtRK=;VUNk=N21W6oJLDJk((r)DPH^Nc9vD
zTdpF!FL@s(?ER0+61Ib;g8_aqU5K?u4uY7M3ObwGVon#L<dZoBUq{ogNE;S&U@5b8
zPs2|)Wib9!F)JQSVw3&i*;?@<u=379Y7+a&cGd47Wlb^e@eeIHaN3^cw$)?s+^g7Z
zGaqJ;sMD^=N8xAC8JafSjYV3e=u++eNB5X9f$T>3TbPdTDmH_BrWR;hpJ%V-o<Y-`
zFqC_;kgaX9f`*VAta0fm*0$^c1P0t=BW6k5nSoI7-=PA-f*G)&FP+*9P4PuqIF^~p
zlUl%f40rm#oS%lVc{Z=uEYS`YoBJD56Eb1N;4^MW+ku@fM@)~+gGa5_Ed7c(e6BKN
zBfECMz+q=5@%t|@Pw_(Y4!xQ!a})`rZSjk(Joz43!GFt_Lsnu)zrt36{~>EO$7TYy
zG<5P4K8+!_6No$ZC*z>eaWoEi!PeAOQ1#_EoSH9DyGRcD4SP}NLuGug-^_ZQbJ;YL
zQpzZ5qm!Rk(Dol~tS#TX)-GAU7JCYy>xUgKj5&{w_RpnD#{W>E<x4)xU=@l6BysDN
zZ=;~rg`!<wvI5Hqe4N{4dTp8vY7-Cg`WA1oIE1jrQ?fQW@DxqIdKR8*<UrW!Aa+0W
zGRhrLCU5o^DYkyVVtHAi&4=e;@7PS=h8p?tL3MP}I0Y5H@J!<RemYJG=y^eaH=c$-
z;(jw)me|T|EsNnUtKWmO-*0j%?vKH}TLlLfSR(cpK%J@uY87tA&@XP-tPxMz&VkgO
zIEoyPWwS}A_aLuW2A2I@>{f9eb6;4=h8MZux2x6kdewURxXGJROYYTJk6#Kt1;3fz
zltNr0b^<R)3E^j`5p`T#0e3H@P}QVH+*O~7Z$4h+jCuv^-mp51EzidG0Dm~9VF*%H
z4_U=s3A`blf>wFUnU=?1c=NTF9gkFH@-07L&x1VnEp8&L-aHNj1N-2aC%~?A`Ls^X
z31ixl@a78<T4c5a7d=s@YyZ0N4!Tom_O=IX&5mx^DRmt_3}(QAzdKk;VgSy1Xbr~?
z?`NO8+@WdNSQfUz1ftK#vRRH}A>-LPEY~|wjiYDKm(R*L+diA!6pv!R8=ga|oPccq
zxdXwwvN^B1r%?8G40B#4MT@iaurn$h)BnlC&g0|h*1-xg`ZxvNT69qHUks0ri>LJ;
zq_9k^oPC)n$%6YX)A(Q2RIKVmg79*d)@WM0C)v2xd0z)8o%Y1LV@lBAr9EBwUO@vE
zoe<emj}<YAe6iFcoON_1&6O-+w%5k83iCfXKJ)_F@_YQbke8UCl#W)$-zj3jev-0X
z1i>m@(E3NijBP2wmwr`DqjM_NB>%#b%2{0TJwMz$^k;`kW^xfHZ;|emBlv!2DhoMv
zfV`F@V^Og#*0x8$w*4BkOI3?|{ap;^Ot}rIF{#{S<zCnpFM>@yGttbd2*z~T;hDRU
zm^8Q;|5m$5`??FsCSnoI#wPZ?)DFwV>$#5~-mun&Y&Nd(HY@2mgT7y%Q9{{SIxONq
zIR!WQWqbC4)!o0??RkH=j}OFHS8EV^JCD@c=fSyCn+UF)!KWkMC>wvh#>?OZ|Jk;Q
zGx0c%W3#;>g+s8J{F{YNkVU7CljtqGon=hSguc7KS@$PZ7Jf~D8Ml@)UpX)E)Jqa3
z{>?+%Qxovx3u&&B2e$Sf#T6MX%&gWFf0x@*qw08=nP*OhQBBOmtQg$=pFr2KPmp;-
zjS_Z8qrmMLw@2$NtJClT)9H74x8xQsnbq*(U8lLz54NChf+<(fq)cN59>5vB3l?SJ
ze>i1!9?V-)$xLo5;>^HvAXt77k|quFFa4xx&TeVE8k&XWiurh^U>Y^a6;UggfR)@U
zQg&&^4-rS{)1(d<C7;gno2LoyXVp;X*Ib&JtwrH%2FuDeuC4C;tJYLs62<iVaP{g^
z{8+Y{<e-Bdt?@yDftaO!!X19G%zt?9qz`Q=vtXq$tC&DZkz(GQ;}p7U`Nf_+D2xig
zj2Ru|(r-@%cVf9|m!{&-^iHO<APZ+Yt>jLRe#}O0idi=Q=MKq9rMSNPD_zQ51`o1s
zlAtmf?|gP-hp(lP{q)1QRn!)b-C7NHYa~cz?k5Y;z0P2>=?1tPXmW=Zc7gBO68Qd0
z41c7b0@{jrAtDwfyRES0P6q}2`!AVxMUs>HR~8{EkD8S;*k{$B%qRCC+s?X}amzz=
zJ2tA8q}yn-mo~k*s>^Q09)@f4wINwU59><RQFmAoF1;3#*{9bWZ!(L_^!K1!_a}%Q
zE6u`B7xKbo<wE_Wbllsq8j@zOgty@mG;y;co}E~X9}V|1)1X3V4IE1f_r#cLya1O-
zM)4C4WP|taHeS5<H59!UK+K{7x(sgUv_1=SL?T)ACK;5_T}k$q(XcztjgDH^vBKrC
za8TkM*yYOMw72%uu`&sT26Frc)2nQ2o)^?CKg!FG`60Al>%?yiuMmpoY{i$y+qns2
zRq52G#V~rH5RSMi(~sviz%^R1%0IszLAM-Ulp76ESMKm_PsUJL<zq;EehkgLb5XfM
GoBjtFh65M?

literal 0
HcmV?d00001

diff --git a/source/tests/data_modifier/sys_10/type.raw b/source/tests/data_modifier/sys_10/type.raw
new file mode 100644
index 0000000000..59f789c42f
--- /dev/null
+++ b/source/tests/data_modifier/sys_10/type.raw
@@ -0,0 +1,48 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/source/tests/data_modifier/sys_10/type_map.raw b/source/tests/data_modifier/sys_10/type_map.raw
new file mode 100644
index 0000000000..2583239060
--- /dev/null
+++ b/source/tests/data_modifier/sys_10/type_map.raw
@@ -0,0 +1,2 @@
+Type_0
+Type_1

From acb29fabafd87b919f558feb26591905ec72b5a6 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 8 Nov 2019 23:08:01 +0800
Subject: [PATCH 19/38] multi-threading for ewald

---
 source/lib/include/Ewald.h | 143 +++++++++++++++++++++++++------------
 1 file changed, 98 insertions(+), 45 deletions(-)

diff --git a/source/lib/include/Ewald.h b/source/lib/include/Ewald.h
index e6fc837386..9b96e9dcde 100644
--- a/source/lib/include/Ewald.h
+++ b/source/lib/include/Ewald.h
@@ -2,6 +2,7 @@
 
 #include<algorithm>
 #include<cassert>
+#include<omp.h>
 
 #include "SimulationRegion.h"
 
@@ -122,77 +123,115 @@ EwaldReciprocal(VALUETYPE &			ener,
   ener = 0;
   fill(force.begin(), force.end(), static_cast<VALUETYPE>(0));
   fill(virial.begin(), virial.end(), static_cast<VALUETYPE>(0));
-  
+
+  // number of threads
+  int nthreads = 1;
+#pragma omp parallel 
+  {
+    if (0 == omp_get_thread_num()) {
+      nthreads = omp_get_num_threads();
+    }
+  }
+
+  // K grid
   vector<int> KK(3);
   int totK = 1;
   cmpt_k<VALUETYPE>(KK, region, param);
   for (int dd = 0; dd < 3; ++dd){
     totK *= (KK[dd]+1);
   }  
+  int stride[3];
+  for (int dd = 0; dd < 3; ++dd) stride[dd] = KK[dd]+1;
   
   // compute the sq
-  VALUETYPE * sqr = new VALUETYPE[totK];
-  VALUETYPE * sqi = new VALUETYPE[totK];
-  for (int ii = 0; ii < totK; ++ii){
-    sqr[ii] = static_cast<VALUETYPE>(0);
-    sqi[ii] = static_cast<VALUETYPE>(0);
-  }
+  vector<vector<VALUETYPE> > thread_sqr(nthreads), thread_sqi(nthreads);
+  for (int ii = 0; ii < nthreads; ++ii){
+    thread_sqr[ii].resize(totK, static_cast<VALUETYPE>(0));
+    thread_sqi[ii].resize(totK, static_cast<VALUETYPE>(0));
+  }  
   // firstly loop over particles then loop over m
-  int mm[3];
+#pragma omp parallel for num_threads(nthreads)
   for (int ii = 0; ii < natoms; ++ii){
+    int thread_id = omp_get_thread_num();
     double ir[3];
     region.phys2Inter(ir, &coord[ii*3]);
-    double mr[3];
-    int mc = 0;
-    for (mm[0] = -KK[0]/2; mm[0] <= KK[0]/2; ++mm[0]){
-      mr[0] = ir[0] * mm[0];
-      for (mm[1] = -KK[1]/2; mm[1] <= KK[1]/2; ++mm[1]){
-	mr[1] = ir[1] * mm[1];
-	for (mm[2] = -KK[2]/2; mm[2] <= KK[2]/2; ++mm[2]){
-	  if (mm[0] == 0 && mm[1] == 0 && mm[2] == 0) continue;
-	  mr[2] = ir[2] * mm[2];
+    for (int mm0 = -KK[0]/2; mm0 <= KK[0]/2; ++mm0){
+      double mr[3];
+      mr[0] = ir[0] * mm0;      
+      int shift0 = (mm0 + KK[0]/2) * stride[1] * stride[2];
+      for (int mm1 = -KK[1]/2; mm1 <= KK[1]/2; ++mm1){
+	mr[1] = ir[1] * mm1;
+	int shift1 = (mm1 + KK[1]/2) * stride[2];
+	for (int mm2 = -KK[2]/2; mm2 <= KK[2]/2; ++mm2){
+	  if (mm0 == 0 && mm1 == 0 && mm2 == 0) continue;
+	  int mc = shift0 + shift1 + mm2 + KK[2]/2;
+	  mr[2] = ir[2] * mm2;
 	  double mdotr = 2. * M_PI * (mr[0]+mr[1]+mr[2]);
-	  sqr[mc] += charge[ii] * cos(mdotr);
-	  sqi[mc] += charge[ii] * sin(mdotr);
-	  ++mc;
+	  thread_sqr[thread_id][mc] += charge[ii] * cos(mdotr);
+	  thread_sqi[thread_id][mc] += charge[ii] * sin(mdotr);
 	}
       }
     }
   }
+  VALUETYPE * sqr = new VALUETYPE[totK];
+  VALUETYPE * sqi = new VALUETYPE[totK];
+  for (int ii = 0; ii < totK; ++ii){
+    sqr[ii] = static_cast<VALUETYPE>(0);
+    sqi[ii] = static_cast<VALUETYPE>(0);
+    for (int jj = 0; jj < nthreads; ++jj){
+      sqr[ii] += thread_sqr[jj][ii];
+      sqi[ii] += thread_sqi[jj][ii];
+    }
+  }  
+
+  // get rbox
   VALUETYPE rec_box[9];
   const double * rec_box_ = region.getRecBoxTensor();
   for (int ii = 0; ii < 9; ++ii){
     rec_box[ii] = static_cast<VALUETYPE>(rec_box_[ii]);
   }
+  
+  vector<VALUETYPE> thread_ener(nthreads, 0.);
+  vector<vector<VALUETYPE> > thread_force(nthreads);
+  vector<vector<VALUETYPE> > thread_virial(nthreads);
+  for (int ii = 0; ii < nthreads; ++ii){
+    thread_force[ii].resize(natoms * 3, 0.);
+    thread_virial[ii].resize(9, 0.);
+  }
   // calculate ener, force and virial
-  // firstly loop over particles then loop over m
-  int mc = 0;
-  for (mm[0] = -KK[0]/2; mm[0] <= KK[0]/2; ++mm[0]){
-    for (mm[1] = -KK[1]/2; mm[1] <= KK[1]/2; ++mm[1]){
-      for (mm[2] = -KK[2]/2; mm[2] <= KK[2]/2; ++mm[2]){
-	if (mm[0] == 0 && mm[1] == 0 && mm[2] == 0) continue;
+  // firstly loop over particles then loop over m  
+#pragma omp parallel for num_threads(nthreads)
+  for (int mm0 = -KK[0]/2; mm0 <= KK[0]/2; ++mm0){
+    int thread_id = omp_get_thread_num();
+    int shift0 = (mm0 + KK[0]/2) * stride[1] * stride[2];
+    for (int mm1 = -KK[1]/2; mm1 <= KK[1]/2; ++mm1){
+      int shift1 = (mm1 + KK[1]/2) * stride[2];
+      for (int mm2 = -KK[2]/2; mm2 <= KK[2]/2; ++mm2){
+	if (mm0 == 0 && mm1 == 0 && mm2 == 0) continue;
+	int mc = shift0 + shift1 + mm2 + KK[2]/2;
 	// \bm m and \vert m \vert^2
 	VALUETYPE rm[3] = {0,0,0};	  
-	for (int dd = 0; dd < 3; ++dd){
-	  rm[0] += mm[dd] * rec_box[0*3+dd];
-	  rm[1] += mm[dd] * rec_box[1*3+dd];
-	  rm[2] += mm[dd] * rec_box[2*3+dd];
-	  // rm[0] += mm[dd] * rec_box[dd*3+0];
-	  // rm[1] += mm[dd] * rec_box[dd*3+1];
-	  // rm[2] += mm[dd] * rec_box[dd*3+2];
-	}
-	VALUETYPE mm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
+	rm[0] += mm0 * rec_box[0*3+0];
+	rm[1] += mm0 * rec_box[1*3+0];
+	rm[2] += mm0 * rec_box[2*3+0];
+	rm[0] += mm1 * rec_box[0*3+1];
+	rm[1] += mm1 * rec_box[1*3+1];
+	rm[2] += mm1 * rec_box[2*3+1];
+	rm[0] += mm2 * rec_box[0*3+2];
+	rm[1] += mm2 * rec_box[1*3+2];
+	rm[2] += mm2 * rec_box[2*3+2];
+	VALUETYPE nmm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
 	// energy
-	VALUETYPE expmm2 = exp(- M_PI * M_PI * mm2 / (param.beta * param.beta)) / mm2;
-	VALUETYPE eincr = expmm2 * (sqr[mc] * sqr[mc] + sqi[mc] * sqi[mc]);
-	ener += eincr;
+	VALUETYPE expnmm2 = exp(- M_PI * M_PI * nmm2 / (param.beta * param.beta)) / nmm2;
+	VALUETYPE eincr = expnmm2 * (sqr[mc] * sqr[mc] + sqi[mc] * sqi[mc]);
+	thread_ener[thread_id] += eincr;
 	// virial
-	VALUETYPE vpref = -2. * (1. + M_PI * M_PI * mm2 / (param.beta * param.beta)) / mm2;
+	VALUETYPE vpref = -2. * (1. + M_PI * M_PI * nmm2 / (param.beta * param.beta)) / nmm2;
 	for (int dd0 = 0; dd0 < 3; ++dd0){
 	  for (int dd1 = 0; dd1 < 3; ++dd1){	    
 	    VALUETYPE tmp = vpref * rm[dd0] * rm[dd1];
 	    if (dd0 == dd1) tmp += 1;
-	    virial[dd0*3+dd1] += eincr * tmp;
+	    thread_virial[thread_id][dd0*3+dd1] += eincr * tmp;
 	  }
 	}
 	// force
@@ -200,15 +239,29 @@ EwaldReciprocal(VALUETYPE &			ener,
 	  VALUETYPE mdotr = - 2. * M_PI * (coord[ii*3+0]*rm[0] + coord[ii*3+1]*rm[1] + coord[ii*3+2]*rm[2]);
 	  VALUETYPE tmpr = charge[ii] * cos(mdotr);
 	  VALUETYPE tmpi = charge[ii] * sin(mdotr);
-	  VALUETYPE cc = 4. * M_PI * (tmpr * sqi[mc] + tmpi * sqr[mc]) * expmm2;
-	  force[ii*3+0] -= rm[0] * cc;
-	  force[ii*3+1] -= rm[1] * cc;
-	  force[ii*3+2] -= rm[2] * cc;
+	  VALUETYPE cc = 4. * M_PI * (tmpr * sqi[mc] + tmpi * sqr[mc]) * expnmm2;
+	  thread_force[thread_id][ii*3+0] -= rm[0] * cc;
+	  thread_force[thread_id][ii*3+1] -= rm[1] * cc;
+	  thread_force[thread_id][ii*3+2] -= rm[2] * cc;
 	}	  
-	++mc;
       }
     }
   }
+  // reduce thread results
+  for (int ii = 0; ii < nthreads; ++ii){
+    ener += thread_ener[ii];
+  }
+  for (int jj = 0; jj < 9; ++jj){
+    for (int ii = 0; ii < nthreads; ++ii){
+      virial[jj] += thread_virial[ii][jj];
+    }
+  }
+  for (int jj = 0; jj < natoms * 3; ++jj){
+    for (int ii = 0; ii < nthreads; ++ii){
+      force[jj] += thread_force[ii][jj];
+    }
+  }
+
   VALUETYPE vol = static_cast<VALUETYPE>(region.getVolume());
   ener /= 2 * M_PI * vol;
   ener *= ElectrostaticConvertion;

From d7134fdf9b5b3b569ad64fcd9fe141acab767a09 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sun, 10 Nov 2019 19:37:23 +0800
Subject: [PATCH 20/38] do not apply polar fitting bias, which does not have
 effect

---
 source/train/Fitting.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/source/train/Fitting.py b/source/train/Fitting.py
index 506f8188b9..cd230d5012 100644
--- a/source/train/Fitting.py
+++ b/source/train/Fitting.py
@@ -410,7 +410,6 @@ def compute_input_stats(self, all_stat, protection = 1e-2):
         all_tmp = []
         for ss in range(len(data)):
             tmp = np.concatenate(data[ss], axis = 0)
-            print(tmp.shape)
             tmp = np.reshape(tmp, [-1, 3, 3])
             tmp,_ = np.linalg.eig(tmp)
             tmp = np.absolute(tmp)
@@ -452,9 +451,9 @@ def build (self,
                     layer = one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed)
             if self.fit_diag :
                 bavg = np.zeros(self.dim_rot_mat_1)
-                bavg[0] = self.avgeig[0]
-                bavg[1] = self.avgeig[1]
-                bavg[2] = self.avgeig[2]
+                # bavg[0] = self.avgeig[0]
+                # bavg[1] = self.avgeig[1]
+                # bavg[2] = self.avgeig[2]
                 # (nframes x natoms) x naxis
                 final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg)
                 # (nframes x natoms) x naxis
@@ -463,9 +462,9 @@ def build (self,
                 final_layer = tf.matrix_diag(final_layer)                
             else :
                 bavg = np.zeros(self.dim_rot_mat_1*self.dim_rot_mat_1)
-                bavg[0*self.dim_rot_mat_1+0] = self.avgeig[0]
-                bavg[1*self.dim_rot_mat_1+1] = self.avgeig[1]
-                bavg[2*self.dim_rot_mat_1+2] = self.avgeig[2]
+                # bavg[0*self.dim_rot_mat_1+0] = self.avgeig[0]
+                # bavg[1*self.dim_rot_mat_1+1] = self.avgeig[1]
+                # bavg[2*self.dim_rot_mat_1+2] = self.avgeig[2]
                 # (nframes x natoms) x (naxis x naxis)
                 final_layer = one_layer(layer, self.dim_rot_mat_1*self.dim_rot_mat_1, activation_fn = None, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, bavg = bavg)
                 # (nframes x natoms) x naxis x naxis

From 11e0fd2224df10faf3df398b347f8a2e5fef3ec8 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sun, 10 Nov 2019 19:56:04 +0800
Subject: [PATCH 21/38] provide stop learning rate and set the decay rate
 automatically

---
 examples/water/train/polar.json               |  4 +--
 examples/water/train/polar_se_a.json          |  8 +++---
 examples/water/train/wannier.json             |  4 +--
 examples/water/train/water.json               |  5 ++--
 examples/water/train/water_se_a.json          |  4 +--
 examples/water/train/water_se_ar.json         |  4 +--
 examples/water/train/water_se_r.json          |  5 ++--
 examples/water/train/water_srtab_example.json |  4 +--
 source/train/LearningRate.py                  | 26 +++++++++++++------
 source/train/Trainer.py                       | 14 ++++++----
 source/train/train.py                         |  4 +--
 11 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/examples/water/train/polar.json b/examples/water/train/polar.json
index 6a1558b124..60e3fa3494 100644
--- a/examples/water/train/polar.json
+++ b/examples/water/train/polar.json
@@ -31,9 +31,9 @@
     
     "learning_rate" :{
 	"type":		"exp",
-	"start_lr":	0.001,
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.001,	
+	"stop_lr":	3.51e-8,
 	"_comment":	"that's all"
     },
 
diff --git a/examples/water/train/polar_se_a.json b/examples/water/train/polar_se_a.json
index 55899e564d..dc90e481ce 100644
--- a/examples/water/train/polar_se_a.json
+++ b/examples/water/train/polar_se_a.json
@@ -3,7 +3,7 @@
     "_comment": " model parameters",
     "model":{
 	"type_map":		["O", "H"],
-	"data_stat_nbatch":	1,
+	"data_stat_nbatch":	10,
 	"descriptor" :{
 	    "type":		"se_a",
 	    "sel":		[46, 92],
@@ -18,7 +18,7 @@
 	"fitting_net": {
 	    "type":		"polar",
 	    "sel_type":		[0],
-	    "fit_diag":		true,
+	    "fit_diag":		false,
 	    "neuron":		[100, 100, 100],
 	    "resnet_dt":	true,
 	    "seed":		1,
@@ -29,9 +29,9 @@
     
     "learning_rate" :{
 	"type":		"exp",
-	"start_lr":	0.01,
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.01,
+	"stop_lr":	3.51e-7,
 	"_comment":	"that's all"
     },
 
diff --git a/examples/water/train/wannier.json b/examples/water/train/wannier.json
index e969675989..f23f5e0d62 100644
--- a/examples/water/train/wannier.json
+++ b/examples/water/train/wannier.json
@@ -32,9 +32,9 @@
     
     "learning_rate" :{
 	"type":		"exp",
-	"start_lr":	0.001,
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.001,	
+	"stop_lr":	3.51e-8,
 	"_comment":	"that's all"
     },
 
diff --git a/examples/water/train/water.json b/examples/water/train/water.json
index 8b1c6619f7..23ba559aed 100644
--- a/examples/water/train/water.json
+++ b/examples/water/train/water.json
@@ -3,6 +3,7 @@
     "_comment": " model parameters",
     "model":{
 	"type_map":		["O", "H"],
+	"data_stat_nbatch":	10,
 	"descriptor": {
 	    "type":		"loc_frame",
 	    "sel_a":		[16, 32],
@@ -28,9 +29,9 @@
     
     "learning_rate" :{
 	"type":		"exp",
-	"start_lr":	0.001,
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.001,
+	"stop_lr":	3.51e-8,
 	"_comment":	"that's all"
     },
 
diff --git a/examples/water/train/water_se_a.json b/examples/water/train/water_se_a.json
index 4557e64fa5..cb005530c1 100644
--- a/examples/water/train/water_se_a.json
+++ b/examples/water/train/water_se_a.json
@@ -24,9 +24,9 @@
 
     "learning_rate" :{
 	"type":		"exp",
-	"start_lr":	0.001,
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.001,	
+	"stop_lr":	3.51e-8,
 	"_comment":	"that's all"
     },
 
diff --git a/examples/water/train/water_se_ar.json b/examples/water/train/water_se_ar.json
index e3677f6205..2173f2e1d9 100644
--- a/examples/water/train/water_se_ar.json
+++ b/examples/water/train/water_se_ar.json
@@ -35,9 +35,9 @@
 
     "learning_rate" :{
 	"type":		"exp",
-	"start_lr":	0.005,
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.005,
+	"stop_lr":	1.76e-7,
 	"_comment":	"that's all"
     },
 
diff --git a/examples/water/train/water_se_r.json b/examples/water/train/water_se_r.json
index c577047189..7faf55a3c3 100644
--- a/examples/water/train/water_se_r.json
+++ b/examples/water/train/water_se_r.json
@@ -23,9 +23,10 @@
     },
 
     "learning_rate" : {
-	"start_lr":	0.005,
+	"type":		"exp",
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.005,	
+	"stop_lr":	1.76e-7,
 	"_comment":	" that's all"
     },
 
diff --git a/examples/water/train/water_srtab_example.json b/examples/water/train/water_srtab_example.json
index 846017a24c..f2a0a4a39c 100644
--- a/examples/water/train/water_srtab_example.json
+++ b/examples/water/train/water_srtab_example.json
@@ -32,9 +32,9 @@
 
     "learning_rate" :{
 	"type":		"exp",
-	"start_lr":	0.001,
 	"decay_steps":	5000,
-	"decay_rate":	0.95,
+	"start_lr":	0.001,	
+	"stop_lr":	3.51e-8,
 	"_comment":	"that's all"
     },
 
diff --git a/source/train/LearningRate.py b/source/train/LearningRate.py
index a26882ef2b..3df4178f38 100644
--- a/source/train/LearningRate.py
+++ b/source/train/LearningRate.py
@@ -7,15 +7,25 @@ class LearningRateExp (object) :
     def __init__ (self, 
                   jdata) :
         args = ClassArg()\
-               .add('decay_steps',      int,    must = True)\
-               .add('decay_rate',       float,  must = True)\
-               .add('start_lr',         float,  must = True)
-        class_data = args.parse(jdata)
-        self.decay_steps_ = class_data['decay_steps']
-        self.decay_rate_ = class_data['decay_rate']
-        self.start_lr_ = class_data['start_lr']
+               .add('decay_steps',      int,    must = False)\
+               .add('decay_rate',       float,  must = False)\
+               .add('start_lr',         float,  must = True)\
+               .add('stop_lr',          float,  must = False)
+        self.cd = args.parse(jdata)
+        self.start_lr_ = self.cd['start_lr']
 
-    def build(self, global_step) :
+    def build(self, global_step, stop_batch = None) :
+        if stop_batch is None:            
+            self.decay_steps_ = self.cd['decay_steps'] if self.cd['decay_steps'] is not None else 5000
+            self.decay_rate_  = self.cd['decay_rate']  if self.cd['decay_rate']  is not None else 0.95
+        else:
+            self.stop_lr_  = self.cd['stop_lr'] if self.cd['stop_lr'] is not None else 5e-8
+            default_ds = 100 if stop_batch // 10 > 100 else stop_batch // 100 + 1
+            self.decay_steps_ = self.cd['decay_steps'] if self.cd['decay_steps'] is not None else default_ds
+            if self.decay_steps_ >= stop_batch:
+                self.decay_steps_ = default_ds
+            self.decay_rate_ = np.exp(np.log(self.stop_lr_ / self.start_lr_) / (stop_batch / self.decay_steps_))
+            
         return tf.train.exponential_decay(self.start_lr_, 
                                           global_step,
                                           self.decay_steps_,
diff --git a/source/train/Trainer.py b/source/train/Trainer.py
index 2cc73e673e..a0aa95f108 100644
--- a/source/train/Trainer.py
+++ b/source/train/Trainer.py
@@ -212,9 +212,11 @@ def _message (self, msg) :
         self.run_opt.message(msg)
 
     def build (self, 
-               data) :
+               data, 
+               stop_batch = 0) :
         self.ntypes = self.model.get_ntypes()
         assert (self.ntypes == data.get_ntypes()), "ntypes should match that found in data"
+        self.stop_batch = stop_batch
 
         self.batch_size = data.get_batch_size()
 
@@ -241,7 +243,7 @@ def build (self,
     def _build_lr(self):
         self._extra_train_ops   = []
         self.global_step = tf.train.get_or_create_global_step()
-        self.learning_rate = self.lr.build(self.global_step)
+        self.learning_rate = self.lr.build(self.global_step, self.stop_batch)
         self._message("built lr")
 
     def _build_network(self, data):        
@@ -373,8 +375,8 @@ def _init_sess_distrib(self):
         # save_checkpoint_steps = self.save_freq)
 
     def train (self, 
-               data, 
-               stop_batch) :
+               data) :
+        stop_batch = self.stop_batch
         if self.run_opt.is_distrib :
             self._init_sess_distrib()
         else :
@@ -387,9 +389,11 @@ def train (self,
 
         cur_batch = self.sess.run(self.global_step)
         self.cur_batch = cur_batch
-        self.run_opt.message("start training at lr %.2e (== %.2e), final lr will be %.2e" % 
+        self.run_opt.message("start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % 
                              (self.sess.run(self.learning_rate),
                               self.lr.value(cur_batch), 
+                              self.lr.decay_steps_,
+                              self.lr.decay_rate_,
                               self.lr.value(stop_batch)) 
         )
 
diff --git a/source/train/train.py b/source/train/train.py
index b41c471a3f..9fd4663057 100755
--- a/source/train/train.py
+++ b/source/train/train.py
@@ -131,11 +131,11 @@ def _do_work(jdata, run_opt):
                             modifier = modifier)
     data.add_dict(data_requirement)
     # build the model with stats from the first system
-    model.build (data)
+    model.build (data, stop_batch)
     # train the model with the provided systems in a cyclic way
     start_time = time.time()
     cur_batch = 0
-    model.train (data, stop_batch)
+    model.train (data)
     end_time = time.time()
     run_opt.message("finished training\nwall time: %.3f s" % (end_time-start_time))
 

From 02577971bad474a6082ec0e68d0bc5a3e974871a Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 11 Nov 2019 18:08:24 +0800
Subject: [PATCH 22/38] ewald reciprocal: build computational graph on
 initialization

---
 source/op/ewald_recp.cc   | 33 ++++++++++++++++-----------------
 source/train/EwaldRecp.py | 35 ++++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/source/op/ewald_recp.cc b/source/op/ewald_recp.cc
index d2d04601a0..29daaa53c8 100644
--- a/source/op/ewald_recp.cc
+++ b/source/op/ewald_recp.cc
@@ -59,22 +59,18 @@ class EwaldRecpOp : public OpKernel {
     const Tensor& box_tensor	= context->input(cc++);
 
     // set size of the sample
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (charge_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of type should be 2"));
+    OP_REQUIRES (context, (coord_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of coord should be 1"));
+    OP_REQUIRES (context, (charge_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of charge should be 1"));
     OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) == 1),	errors::InvalidArgument ("size of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of box should be 2"));
+    OP_REQUIRES (context, (box_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of box should be 1"));
     auto natoms	= natoms_tensor.flat<int>();
     int nloc = natoms(0);
-    int nsamples = coord_tensor.shape().dim_size(0);
+    int nsamples = coord_tensor.shape().dim_size(0) / (nloc * 3);
 
     // check the sizes
-    OP_REQUIRES (context, (nsamples == coord_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == charge_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-
-    OP_REQUIRES (context, (nloc * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nloc == charge_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
+    OP_REQUIRES (context, (nsamples * nloc * 3 == coord_tensor.shape().dim_size(0)),	errors::InvalidArgument ("coord  number of samples should match"));
+    OP_REQUIRES (context, (nsamples * nloc * 1 == charge_tensor.shape().dim_size(0)),	errors::InvalidArgument ("charge number of samples should match"));
+    OP_REQUIRES (context, (nsamples * 9 == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("box    number of samples should match"));
 
     // Create an output tensor
     TensorShape energy_shape ;
@@ -94,18 +90,21 @@ class EwaldRecpOp : public OpKernel {
     Tensor* virial_tensor = NULL;
     OP_REQUIRES_OK(context, context->allocate_output(cc++, virial_shape, &virial_tensor));
     
-    auto coord	= coord_tensor	.matrix<VALUETYPE>();
-    auto charge	= charge_tensor	.matrix<VALUETYPE>();
-    auto box	= box_tensor	.matrix<VALUETYPE>();
+    auto coord	= coord_tensor	.flat<VALUETYPE>();
+    auto charge	= charge_tensor	.flat<VALUETYPE>();
+    auto box	= box_tensor	.flat<VALUETYPE>();
     auto energy	= energy_tensor	->flat<VALUETYPE>();
     auto force	= force_tensor	->matrix<VALUETYPE>();
     auto virial	= virial_tensor	->matrix<VALUETYPE>();
 
     for (int kk = 0; kk < nsamples; ++kk){
+      int box_iter = kk * 9;
+      int coord_iter = kk * nloc * 3;
+      int charge_iter = kk * nloc;
       // set region
       boxtensor_t boxt [9] = {0};
       for (int dd = 0; dd < 9; ++dd) {
-	boxt[dd] = box(kk, dd);
+	boxt[dd] = box(box_iter + dd);
       }
       SimulationRegion<boxtensor_t > region;
       region.reinitBox (boxt);
@@ -114,7 +113,7 @@ class EwaldRecpOp : public OpKernel {
       vector<boxtensor_t > d_coord3_ (nloc*3);
       for (int ii = 0; ii < nloc; ++ii){
 	for (int dd = 0; dd < 3; ++dd){
-	  d_coord3_[ii*3+dd] = coord(kk, ii*3+dd);
+	  d_coord3_[ii*3+dd] = coord(coord_iter + ii*3+dd);
 	}
 	double inter[3];
 	region.phys2Inter (inter, &d_coord3_[3*ii]);
@@ -130,7 +129,7 @@ class EwaldRecpOp : public OpKernel {
 
       // set charge
       vector<VALUETYPE > d_charge (nloc);
-      for (int ii = 0; ii < nloc; ++ii) d_charge[ii] = charge(kk, ii);
+      for (int ii = 0; ii < nloc; ++ii) d_charge[ii] = charge(charge_iter + ii);
 
       // prepare outputs vectors
       VALUETYPE d_ener;
diff --git a/source/train/EwaldRecp.py b/source/train/EwaldRecp.py
index 20217d0428..619fd0d514 100644
--- a/source/train/EwaldRecp.py
+++ b/source/train/EwaldRecp.py
@@ -27,6 +27,16 @@ def __init__(self,
         self.hh = hh
         self.beta = beta
         self.sess = tf.Session()
+        # place holders
+        self.t_nloc       = tf.placeholder(tf.int32, [1], name = "t_nloc")
+        self.t_coord      = tf.placeholder(global_tf_float_precision, [None], name='t_coord')
+        self.t_charge     = tf.placeholder(global_tf_float_precision, [None], name='t_charge')
+        self.t_box        = tf.placeholder(global_tf_float_precision, [None], name='t_box')
+        
+        self.t_energy, self.t_force, self.t_virial \
+            = op_module.ewald_recp(self.t_coord, self.t_charge, self.t_nloc, self.t_box, 
+                                   ewald_h = self.hh,
+                                   ewald_beta = self.beta)
 
     def eval(self, 
              coord, 
@@ -37,26 +47,17 @@ def eval(self,
         box = np.array(box)
         nframes = charge.shape[0]
         natoms = charge.shape[1]
-        coord = np.reshape(coord, [nframes, 3*natoms])
-        box = np.reshape(box, [nframes, 9])
-        # place holders
-        t_coord      = tf.placeholder(global_tf_float_precision, [None, natoms * 3], name='t_coord')
-        t_charge     = tf.placeholder(global_tf_float_precision, [None, natoms], name='t_charge')
-        t_box        = tf.placeholder(global_tf_float_precision, [None, 9], name='t_box')
-        t_nloc    = tf.placeholder(tf.int32, [1], name = "t_nloc")
-        
-        t_energy, t_force, t_virial \
-            = op_module.ewald_recp(t_coord, t_charge, t_nloc, t_box, 
-                                   ewald_h = self.hh,
-                                   ewald_beta = self.beta)
+        coord = np.reshape(coord, [nframes * 3 * natoms])
+        charge = np.reshape(charge, [nframes * natoms])
+        box = np.reshape(box, [nframes * 9])
 
         [energy, force, virial] \
-            = self.sess.run([t_energy, t_force, t_virial], 
+            = self.sess.run([self.t_energy, self.t_force, self.t_virial], 
                             feed_dict = {
-                                t_coord:  coord,
-                                t_charge: charge,
-                                t_box:    box,
-                                t_nloc:   [natoms],
+                                self.t_coord:  coord,
+                                self.t_charge: charge,
+                                self.t_box:    box,
+                                self.t_nloc:   [natoms],
                             })
 
         return energy, force, virial

From 4ebc65e2205f414d25331716020cf55697823d7e Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 11 Nov 2019 18:14:43 +0800
Subject: [PATCH 23/38] revise test_ewald according to new interface

---
 source/tests/test_ewald.py | 48 +++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/source/tests/test_ewald.py b/source/tests/test_ewald.py
index b6aee7d6b0..4131cfa4df 100644
--- a/source/tests/test_ewald.py
+++ b/source/tests/test_ewald.py
@@ -54,9 +54,9 @@ def setUp(self):
         self.dcoord = np.array(self.dcoord).reshape([self.nframes, 3*self.natoms])
         self.dcharge = np.array(self.dcharge).reshape([self.nframes, self.natoms])
         # place holders
-        self.coord      = tf.placeholder(global_tf_float_precision, [None, self.natoms * 3], name='t_coord')
-        self.charge     = tf.placeholder(global_tf_float_precision, [None, self.natoms], name='t_charge')
-        self.box        = tf.placeholder(global_tf_float_precision, [None, 9], name='t_box')
+        self.coord      = tf.placeholder(global_tf_float_precision, [None], name='t_coord')
+        self.charge     = tf.placeholder(global_tf_float_precision, [None], name='t_charge')
+        self.box        = tf.placeholder(global_tf_float_precision, [None], name='t_box')
         self.nloc    = tf.placeholder(tf.int32, [1], name = "t_nloc")        
 
     def test_py_interface(self):
@@ -69,9 +69,9 @@ def test_py_interface(self):
                                    ewald_beta = self.ewald_beta)
         [e, f, v] = sess.run([t_energy, t_force, t_virial], 
                            feed_dict = {
-                               self.coord:  self.dcoord,
-                               self.charge: self.dcharge,
-                               self.box:    self.dbox,
+                               self.coord:  self.dcoord.reshape([-1]),
+                               self.charge: self.dcharge.reshape([-1]),
+                               self.box:    self.dbox.reshape([-1]),
                                self.nloc:   [self.natoms],
                            })
         er = EwaldRecp(self.ewald_h, self.ewald_beta)
@@ -103,9 +103,9 @@ def test_force(self):
                                    ewald_beta = self.ewald_beta)
         [force] = sess.run([t_force], 
                            feed_dict = {
-                               self.coord:  self.dcoord,
-                               self.charge: self.dcharge,
-                               self.box:    self.dbox,
+                               self.coord:  self.dcoord.reshape([-1]),
+                               self.charge: self.dcharge.reshape([-1]),
+                               self.box:    self.dbox.reshape([-1]),
                                self.nloc:   [self.natoms],
                            })
         for idx in range(self.natoms):
@@ -116,16 +116,16 @@ def test_force(self):
                 dcoordm[:,idx*3+dd] = self.dcoord[:,idx*3+dd] - hh
                 energyp = sess.run([t_energy], 
                                    feed_dict = {
-                                       self.coord:  dcoordp,
-                                       self.charge: self.dcharge,
-                                       self.box:    self.dbox,
+                                       self.coord:  dcoordp.reshape([-1]),
+                                       self.charge: self.dcharge.reshape([-1]),
+                                       self.box:    self.dbox.reshape([-1]),
                                        self.nloc:   [self.natoms],
                                    })                                
                 energym = sess.run([t_energy], 
                                    feed_dict = {
-                                       self.coord:  dcoordm,
-                                       self.charge: self.dcharge,
-                                       self.box:    self.dbox,
+                                       self.coord:  dcoordm.reshape([-1]),
+                                       self.charge: self.dcharge.reshape([-1]),
+                                       self.box:    self.dbox.reshape([-1]),
                                        self.nloc:   [self.natoms],
                                    })
                 c_force = -(energyp[0] - energym[0]) / (2*hh)
@@ -145,9 +145,9 @@ def test_virial(self):
                                    ewald_beta = self.ewald_beta)
         [virial] = sess.run([t_virial], 
                            feed_dict = {
-                               self.coord:  self.dcoord,
-                               self.charge: self.dcharge,
-                               self.box:    self.dbox,
+                               self.coord:  self.dcoord.reshape([-1]),
+                               self.charge: self.dcharge.reshape([-1]),
+                               self.box:    self.dbox.reshape([-1]),
                                self.nloc:   [self.natoms],
                            })
 
@@ -181,16 +181,16 @@ def test_virial(self):
                 dcoordm = np.reshape(dcoord3m, [self.nframes,-1])
                 energyp = sess.run([t_energy],
                                    feed_dict = {
-                                       self.coord:  dcoordp,
-                                       self.charge: self.dcharge,
-                                       self.box:    dboxp,
+                                       self.coord:  dcoordp.reshape([-1]),
+                                       self.charge: self.dcharge.reshape([-1]),
+                                       self.box:    dboxp.reshape([-1]),
                                        self.nloc:   [self.natoms],
                                    })
                 energym = sess.run([t_energy], 
                                    feed_dict = {
-                                       self.coord:  dcoordm,
-                                       self.charge: self.dcharge,
-                                       self.box:    dboxm,
+                                       self.coord:  dcoordm.reshape([-1]),
+                                       self.charge: self.dcharge.reshape([-1]),
+                                       self.box:    dboxm.reshape([-1]),
                                        self.nloc:   [self.natoms],
                                    })
                 num_deriv[:,ii,jj] = -(energyp[0] - energym[0]) / (2.*hh)

From b5be8fecd50208dea438e12a37b31ace7165daf6 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 11 Nov 2019 18:32:20 +0800
Subject: [PATCH 24/38] expance three fold loop to enable better multi-thread
 parallelization

---
 source/lib/include/Ewald.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/source/lib/include/Ewald.h b/source/lib/include/Ewald.h
index 9b96e9dcde..e115ec3e97 100644
--- a/source/lib/include/Ewald.h
+++ b/source/lib/include/Ewald.h
@@ -201,14 +201,22 @@ EwaldReciprocal(VALUETYPE &			ener,
   // calculate ener, force and virial
   // firstly loop over particles then loop over m  
 #pragma omp parallel for num_threads(nthreads)
-  for (int mm0 = -KK[0]/2; mm0 <= KK[0]/2; ++mm0){
+  for (int mc = 0; mc < totK; ++mc){
     int thread_id = omp_get_thread_num();
-    int shift0 = (mm0 + KK[0]/2) * stride[1] * stride[2];
-    for (int mm1 = -KK[1]/2; mm1 <= KK[1]/2; ++mm1){
-      int shift1 = (mm1 + KK[1]/2) * stride[2];
-      for (int mm2 = -KK[2]/2; mm2 <= KK[2]/2; ++mm2){
+    int mm0 = mc / (stride[1] * stride[2]);
+    int left = mc - mm0 * stride[1] * stride[2];
+    int mm1 = left / stride[2];
+    int mm2 = left - mm1 * stride[2];
+    mm0 -= KK[0]/2;
+    mm1 -= KK[1]/2;
+    mm2 -= KK[2]/2;
+  // for (int mm0 = -KK[0]/2; mm0 <= KK[0]/2; ++mm0){
+  //   int shift0 = (mm0 + KK[0]/2) * stride[1] * stride[2];
+  //   for (int mm1 = -KK[1]/2; mm1 <= KK[1]/2; ++mm1){
+  //     int shift1 = (mm1 + KK[1]/2) * stride[2];
+  //     for (int mm2 = -KK[2]/2; mm2 <= KK[2]/2; ++mm2){
+  // 	int mc = shift0 + shift1 + mm2 + KK[2]/2;
 	if (mm0 == 0 && mm1 == 0 && mm2 == 0) continue;
-	int mc = shift0 + shift1 + mm2 + KK[2]/2;
 	// \bm m and \vert m \vert^2
 	VALUETYPE rm[3] = {0,0,0};	  
 	rm[0] += mm0 * rec_box[0*3+0];
@@ -243,9 +251,9 @@ EwaldReciprocal(VALUETYPE &			ener,
 	  thread_force[thread_id][ii*3+0] -= rm[0] * cc;
 	  thread_force[thread_id][ii*3+1] -= rm[1] * cc;
 	  thread_force[thread_id][ii*3+2] -= rm[2] * cc;
-	}	  
-      }
-    }
+	}
+    //   }
+    // }
   }
   // reduce thread results
   for (int ii = 0; ii < nthreads; ++ii){

From 151d07a4e223e1801115ffd34cd9b61c40bcfbdb Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sat, 16 Nov 2019 14:03:54 +0800
Subject: [PATCH 25/38] add c++ interface for DeepTensor, not tested

---
 source/lib/include/DeepTensor.h |  43 ++++
 source/lib/include/NNPInter.h   |  49 +----
 source/lib/include/common.h     | 109 +++++++++
 source/lib/src/DeepTensor.cc    | 116 ++++++++++
 source/lib/src/NNPInter.cc      | 379 +-------------------------------
 source/lib/src/common.cc        | 348 +++++++++++++++++++++++++++++
 source/train/Model.py           |   3 +
 7 files changed, 631 insertions(+), 416 deletions(-)
 create mode 100644 source/lib/include/DeepTensor.h
 create mode 100644 source/lib/include/common.h
 create mode 100644 source/lib/src/DeepTensor.cc
 create mode 100644 source/lib/src/common.cc

diff --git a/source/lib/include/DeepTensor.h b/source/lib/include/DeepTensor.h
new file mode 100644
index 0000000000..389177c6e7
--- /dev/null
+++ b/source/lib/include/DeepTensor.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include "NNPInter.h"
+
+class DeepTensor
+{
+public:
+  DeepTensor () ;
+  DeepTensor  (const string & model, const int & gpu_rank = 0);
+  void init (const string & model, const int & gpu_rank = 0);
+  void print_summary(const string &pre) const;
+public:
+  void compute (vector<VALUETYPE> &		value,
+		const vector<VALUETYPE> &	coord,
+		const vector<int> &		atype,
+		const vector<VALUETYPE> &	box,
+		const int			nghost = 0);
+  void compute (vector<VALUETYPE> &		value,
+		const vector<VALUETYPE> &	coord,
+		const vector<int> &		atype,
+		const vector<VALUETYPE> &	box, 
+		const int			nghost,
+		const LammpsNeighborList &	lmp_list);
+  VALUETYPE cutoff () const {assert(inited); return rcut;};
+  int numb_types () const {assert(inited); return ntypes;};
+private:
+  Session* session;
+  int num_intra_nthreads, num_inter_nthreads;
+  GraphDef graph_def;
+  bool inited;
+  VALUETYPE rcut;
+  VALUETYPE cell_size;
+  int ntypes;
+  string model_type;
+  int odim;
+  template<class VT> VT get_scalar(const string & name) const;
+  void run_model (vector<VALUETYPE> &		d_tensor_,
+		  Session *			session, 
+		  const std::vector<std::pair<string, Tensor>> & input_tensors,
+		  const NNPAtomMap<VALUETYPE> &	nnpmap, 
+		  const int			nghost = 0);
+};
+
diff --git a/source/lib/include/NNPInter.h b/source/lib/include/NNPInter.h
index a06faaa3b9..7aa4c0ebe9 100644
--- a/source/lib/include/NNPInter.h
+++ b/source/lib/include/NNPInter.h
@@ -1,53 +1,6 @@
 #pragma once
 
-#include "tensorflow/core/public/session.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
-
-#include <vector>
-#include "version.h"
-
-using namespace tensorflow;
-using namespace std;
-
-#ifdef HIGH_PREC
-typedef double VALUETYPE;
-typedef double ENERGYTYPE;
-#else 
-typedef float  VALUETYPE;
-typedef double ENERGYTYPE;
-#endif
-
-struct LammpsNeighborList 
-{
-  int inum;
-  const int * ilist;
-  const int * numneigh;
-  const int *const* firstneigh;
-  LammpsNeighborList (int inum_, 
-		      const int * ilist_,
-		      const int * numneigh_, 
-		      const int *const* firstneigh_) 
-      : inum(inum_), ilist(ilist_), numneigh(numneigh_), firstneigh(firstneigh_)
-      {
-      }
-};
-
-struct InternalNeighborList 
-{
-  int * pilist;
-  int * pjrange;
-  int * pjlist;
-  vector<int > ilist;
-  vector<int > jrange;
-  vector<int > jlist;
-  void clear () {ilist.clear(); jrange.clear(); jlist.clear();}
-  void make_ptrs () {
-    pilist = &ilist[0]; pjrange = &jrange[0]; pjlist = &jlist[0];
-  }
-};
+#include "common.h"
 
 class NNPInter 
 {
diff --git a/source/lib/include/common.h b/source/lib/include/common.h
new file mode 100644
index 0000000000..3e4ad3c521
--- /dev/null
+++ b/source/lib/include/common.h
@@ -0,0 +1,109 @@
+#pragma once
+
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+using namespace tensorflow;
+using namespace std;
+
+#include "NNPAtomMap.h"
+#include <vector>
+#include "version.h"
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+typedef double ENERGYTYPE;
+#else 
+typedef float  VALUETYPE;
+typedef double ENERGYTYPE;
+#endif
+
+struct LammpsNeighborList 
+{
+  int inum;
+  const int * ilist;
+  const int * numneigh;
+  const int *const* firstneigh;
+  LammpsNeighborList (int inum_, 
+		      const int * ilist_,
+		      const int * numneigh_, 
+		      const int *const* firstneigh_) 
+      : inum(inum_), ilist(ilist_), numneigh(numneigh_), firstneigh(firstneigh_)
+      {
+      }
+};
+
+struct InternalNeighborList 
+{
+  int * pilist;
+  int * pjrange;
+  int * pjlist;
+  vector<int > ilist;
+  vector<int > jrange;
+  vector<int > jlist;
+  void clear () {ilist.clear(); jrange.clear(); jlist.clear();}
+  void make_ptrs () {
+    pilist = &ilist[0]; pjrange = &jrange[0]; pjlist = &jlist[0];
+  }
+};
+
+void
+convert_nlist_lmp_internal (InternalNeighborList & list,
+			    const LammpsNeighborList & lmp_list);
+
+void
+shuffle_nlist (InternalNeighborList & list, 
+	       const NNPAtomMap<VALUETYPE> & map);
+
+void
+get_env_nthreads(int & num_intra_nthreads,
+		 int & num_inter_nthreads);
+
+void
+checkStatus(const tensorflow::Status& status);
+
+template<class VT>
+VT
+session_get_scalar(Session* session, const string name);
+
+int
+session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
+		       const vector<VALUETYPE> &	dcoord_,
+		       const int &			ntypes,
+		       const vector<int> &		datype_,
+		       const vector<VALUETYPE> &	dbox, 
+		       const VALUETYPE &		cell_size,
+		       const vector<VALUETYPE> &	fparam_,
+		       const vector<VALUETYPE> &	aparam_,
+		       const NNPAtomMap<VALUETYPE>&	nnpmap,
+		       const int			nghost = 0);
+
+int
+session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
+		       const vector<VALUETYPE> &	dcoord_,
+		       const int &			ntypes,
+		       const vector<int> &		datype_,
+		       const vector<VALUETYPE> &	dbox,		    
+		       InternalNeighborList &		dlist, 
+		       const vector<VALUETYPE> &	fparam_,
+		       const vector<VALUETYPE> &	aparam_,
+		       const NNPAtomMap<VALUETYPE>&	nnpmap,
+		       const int			nghost);
+
+
+template<class VT>
+VT
+session_get_scalar(Session* session, const string name) 
+{
+  std::vector<Tensor> output_tensors;
+  checkStatus (session->Run(std::vector<std::pair<string, Tensor>> ({}), 
+			    {name.c_str()}, 
+			    {}, 
+			    &output_tensors));
+  Tensor output_rc = output_tensors[0];
+  auto orc = output_rc.flat <VT> ();
+  return orc(0);
+}
diff --git a/source/lib/src/DeepTensor.cc b/source/lib/src/DeepTensor.cc
new file mode 100644
index 0000000000..00eab78f0f
--- /dev/null
+++ b/source/lib/src/DeepTensor.cc
@@ -0,0 +1,116 @@
+#include "DeepTensor.h"
+
+DeepTensor::
+DeepTensor(const string & model, const int & gpu_rank)
+    : inited (false)
+{
+  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
+  init(model, gpu_rank);  
+}
+
+void
+DeepTensor::
+init (const string & model, const int & gpu_rank)
+{
+  assert (!inited);
+  SessionOptions options;
+  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
+  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
+  checkStatus (NewSession(options, &session));
+  checkStatus (ReadBinaryProto(Env::Default(), model, &graph_def));
+  checkStatus (session->Create(graph_def));  
+  rcut = get_scalar<VALUETYPE>("descrpt_attr/rcut");
+  cell_size = rcut;
+  ntypes = get_scalar<int>("descrpt_attr/ntypes");
+  model_type = get_scalar<string>("model_attr/model_type");
+  odim = get_scalar<int>("model_attr/output_dim");
+  inited = true;
+}
+
+template<class VT>
+VT
+DeepTensor::
+get_scalar (const string & name) const
+{
+  return session_get_scalar<VT>(session, name);
+}
+
+void 
+DeepTensor::
+run_model (vector<VALUETYPE> &		d_tensor_,
+	   Session *			session, 
+	   const std::vector<std::pair<string, Tensor>> & input_tensors,
+	   const NNPAtomMap<VALUETYPE> &nnpmap, 
+	   const int			nghost)
+{
+  unsigned nloc = nnpmap.get_type().size();
+  unsigned nall = nloc + nghost;
+  if (nloc == 0) {
+    // no backward map needed
+    d_tensor_.resize(nall * odim);
+    fill(d_tensor_.begin(), d_tensor_.end(), 0.0);
+    return;
+  }
+
+  std::vector<Tensor> output_tensors;
+  checkStatus (session->Run(input_tensors, 
+			    {"o_" + model_type},
+			    {}, 
+			    &output_tensors));
+  
+  Tensor output_t = output_tensors[0];
+
+  auto ot = output_t.flat<VALUETYPE> ();
+
+  vector<VALUETYPE> d_tensor (nall * odim);
+  for (unsigned ii = 0; ii < nall * odim; ++ii){
+    d_tensor[ii] = ot(ii);
+  }
+  d_tensor_ = d_tensor;
+  nnpmap.backward (d_tensor_.begin(), d_tensor.begin(), odim);
+}
+
+void
+DeepTensor::
+compute (vector<VALUETYPE> &		dtensor_,
+	 const vector<VALUETYPE> &	dcoord_,
+	 const vector<int> &		datype_,
+	 const vector<VALUETYPE> &	dbox, 
+	 const int			nghost)
+{
+  int nall = dcoord_.size() / 3;
+  int nloc = nall - nghost;
+  NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.begin() + nloc);
+  assert (nloc == nnpmap.get_type().size());
+
+  std::vector<std::pair<string, Tensor>> input_tensors;
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, vector<VALUETYPE>(), vector<VALUETYPE>(), nnpmap, nghost);
+  assert (ret == nloc);
+
+  run_model (dtensor_, session, input_tensors, nnpmap, nghost);
+}
+
+void
+DeepTensor::
+compute (vector<VALUETYPE> &		dtensor_,
+	 const vector<VALUETYPE> &	dcoord_,
+	 const vector<int> &		datype_,
+	 const vector<VALUETYPE> &	dbox, 
+	 const int			nghost,
+	 const LammpsNeighborList &	lmp_list)
+{
+  int nall = dcoord_.size() / 3;
+  int nloc = nall - nghost;
+  NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.begin() + nloc);
+  assert (nloc == nnpmap.get_type().size());
+
+  InternalNeighborList nlist;
+  convert_nlist_lmp_internal (nlist, lmp_list);
+  shuffle_nlist (nlist, nnpmap);
+
+  std::vector<std::pair<string, Tensor>> input_tensors;
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, vector<VALUETYPE>(), vector<VALUETYPE>(), nnpmap, nghost);
+  assert (nloc == ret);
+
+  run_model (dtensor_, session, input_tensors, nnpmap, nghost);
+}
diff --git a/source/lib/src/NNPInter.cc b/source/lib/src/NNPInter.cc
index fa1f989b49..b01fe6dc1c 100644
--- a/source/lib/src/NNPInter.cc
+++ b/source/lib/src/NNPInter.cc
@@ -9,329 +9,7 @@
 #include <tensorflow/core/graph/graph_def_builder.h>
 #endif
 
-static
-void
-checkStatus(const tensorflow::Status& status) {
-  if (!status.ok()) {
-    std::cout << status.ToString() << std::endl;
-    exit(1);
-  }
-}
 
-static void
-convert_nlist_lmp_internal (InternalNeighborList & list,
-			    const LammpsNeighborList & lmp_list) 
-{
-  list.clear();
-  int total_num_nei = 0;
-  int inum = lmp_list.inum;
-  for (int ii = 0; ii < inum; ++ii){
-    total_num_nei += lmp_list.numneigh[ii];
-  }
-  list.ilist.resize(inum);
-  list.jrange.resize(inum+1);
-  list.jlist.resize(total_num_nei);
-  memcpy(&list.ilist[0], lmp_list.ilist, inum*sizeof(int));
-  list.jrange[0] = 0;
-  for (int ii = 0; ii < inum; ++ii){
-    int jnum = lmp_list.numneigh[ii];
-    list.jrange[ii+1] = list.jrange[ii] + jnum;
-    const int * jlist = lmp_list.firstneigh[ii];
-    memcpy(&(list.jlist[list.jrange[ii]]), jlist, jnum*sizeof(int));
-  }
-}
-
-static void
-shuffle_nlist (InternalNeighborList & list, 
-	       const NNPAtomMap<VALUETYPE> & map)
-{
-  const vector<int> & fwd_map = map.get_fwd_map();
-  int nloc = fwd_map.size();
-  for (unsigned ii = 0; ii < list.ilist.size(); ++ii){
-    if (list.ilist[ii] < nloc) {
-      list.ilist[ii] = fwd_map[list.ilist[ii]];
-    }
-  }
-  for (unsigned ii = 0; ii < list.jlist.size(); ++ii){
-    if (list.jlist[ii] < nloc) {
-      list.jlist[ii] = fwd_map[list.jlist[ii]];
-    }
-  }
-}
-
-static int
-make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
-		    const vector<VALUETYPE> &	dcoord_,
-		    const int &			ntypes,
-		    const vector<int> &		datype_,
-		    const vector<VALUETYPE> &	dbox, 
-		    const VALUETYPE &		cell_size,
-    		    const vector<VALUETYPE> &	fparam_,
-    		    const vector<VALUETYPE> &	aparam_,
-		    const NNPAtomMap<VALUETYPE>&nnpmap,
-		    const int			nghost = 0)
-{
-  bool b_ghost = (nghost != 0);
-  
-  assert (dbox.size() == 9);
-
-  int nframes = 1;
-  int nall = dcoord_.size() / 3;
-  int nloc = nall - nghost;
-  assert (nall == datype_.size());
-
-  vector<int > datype = nnpmap.get_type();
-  vector<int > type_count (ntypes, 0);
-  for (unsigned ii = 0; ii < datype.size(); ++ii){
-    type_count[datype[ii]] ++;
-  }
-  datype.insert (datype.end(), datype_.begin() + nloc, datype_.end());
-
-  SimulationRegion<VALUETYPE> region;
-  vector<double > dbox_(9);
-  for (int dd = 0; dd < 9; ++dd) dbox_[dd] = dbox[dd];
-  region.reinitBox (&dbox_[0]);
-  double box_l[3];
-  region.toFaceDistance (box_l);
-  
-  vector<int > ncell (3, 2);
-  for (int dd = 0; dd < 3; ++dd){
-    ncell[dd] = box_l[dd] / cell_size;
-    if (ncell[dd] < 2) ncell[dd] = 2;
-  }
-  vector<int > next(3, 0);
-  for (int dd = 0; dd < 3; ++dd){
-    double cellh = box_l[dd] / ncell[dd];
-    next[dd] = cellh / cell_size;
-    if (next[dd] * cellh < cell_size) next[dd]++;
-    assert (next[dd] * cellh >= cell_size);
-  }
-
-  TensorShape coord_shape ;
-  coord_shape.AddDim (nframes);
-  coord_shape.AddDim (nall * 3);
-  TensorShape type_shape ;
-  type_shape.AddDim (nframes);
-  type_shape.AddDim (nall);
-  TensorShape box_shape ;
-  box_shape.AddDim (nframes);
-  box_shape.AddDim (9);
-  TensorShape mesh_shape ;
-  if (!b_ghost){
-    mesh_shape.AddDim (6);
-  }
-  else {
-    mesh_shape.AddDim (12);
-  }
-  TensorShape natoms_shape ;
-  natoms_shape.AddDim (2 + ntypes);
-  TensorShape fparam_shape ;
-  fparam_shape.AddDim (nframes);
-  fparam_shape.AddDim (fparam_.size());
-  TensorShape aparam_shape ;
-  aparam_shape.AddDim (nframes);
-  aparam_shape.AddDim (aparam_.size());
-  
-#ifdef HIGH_PREC
-  Tensor coord_tensor	(DT_DOUBLE, coord_shape);
-  Tensor box_tensor	(DT_DOUBLE, box_shape);
-  Tensor fparam_tensor  (DT_DOUBLE, fparam_shape);
-  Tensor aparam_tensor  (DT_DOUBLE, aparam_shape);
-#else
-  Tensor coord_tensor	(DT_FLOAT, coord_shape);
-  Tensor box_tensor	(DT_FLOAT, box_shape);
-  Tensor fparam_tensor  (DT_FLOAT, fparam_shape);
-  Tensor aparam_tensor  (DT_FLOAT, aparam_shape);
-#endif
-  Tensor type_tensor	(DT_INT32, type_shape);
-  Tensor mesh_tensor	(DT_INT32, mesh_shape);
-  Tensor natoms_tensor	(DT_INT32, natoms_shape);
-
-  auto coord = coord_tensor.matrix<VALUETYPE> ();
-  auto type = type_tensor.matrix<int> ();
-  auto box = box_tensor.matrix<VALUETYPE> ();
-  auto mesh = mesh_tensor.flat<int> ();
-  auto natoms = natoms_tensor.flat<int> ();  
-  auto fparam = fparam_tensor.matrix<VALUETYPE> ();
-  auto aparam = aparam_tensor.matrix<VALUETYPE> ();
-
-  vector<VALUETYPE> dcoord (dcoord_);
-  nnpmap.forward (dcoord.begin(), dcoord_.begin(), 3);
-  
-  for (int ii = 0; ii < nframes; ++ii){
-    for (int jj = 0; jj < nall * 3; ++jj){
-      coord(ii, jj) = dcoord[jj];
-    }
-    for (int jj = 0; jj < 9; ++jj){
-      box(ii, jj) = dbox[jj];
-    }
-    for (int jj = 0; jj < nall; ++jj){
-      type(ii, jj) = datype[jj];
-    }
-    for (int jj = 0; jj < fparam_.size(); ++jj){
-      fparam(ii, jj) = fparam_[jj];
-    }
-    for (int jj = 0; jj < aparam_.size(); ++jj){
-      aparam(ii, jj) = aparam_[jj];
-    }
-  }
-  mesh (1-1) = 0;
-  mesh (2-1) = 0;
-  mesh (3-1) = 0;
-  mesh (4-1) = ncell[0];
-  mesh (5-1) = ncell[1];
-  mesh (6-1) = ncell[2];
-  if (b_ghost){
-    mesh(7-1) = -next[0];
-    mesh(8-1) = -next[1];
-    mesh(9-1) = -next[2];
-    mesh(10-1) = ncell[0] + next[0];
-    mesh(11-1) = ncell[1] + next[1];
-    mesh(12-1) = ncell[2] + next[2];
-  }
-  natoms (0) = nloc;
-  natoms (1) = nall;
-  for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
-
-  input_tensors = {
-    {"t_coord",	coord_tensor}, 
-    {"t_type",	type_tensor},
-    {"t_box",	box_tensor},
-    {"t_mesh",	mesh_tensor},
-    {"t_natoms",natoms_tensor},
-  };  
-  if (fparam_.size() > 0) {
-    input_tensors.push_back({"t_fparam", fparam_tensor});
-  }
-  if (aparam_.size() > 0) {
-    input_tensors.push_back({"t_aparam", aparam_tensor});
-  }
-  return nloc;
-}
-
-static int
-make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
-		    const vector<VALUETYPE> &	dcoord_,
-		    const int &			ntypes,
-		    const vector<int> &		datype_,
-		    const vector<VALUETYPE> &	dbox,		    
-		    InternalNeighborList &	dlist, 
-    		    const vector<VALUETYPE> &	fparam_,
-    		    const vector<VALUETYPE> &	aparam_,
-		    const NNPAtomMap<VALUETYPE>&nnpmap,
-    		    const int			nghost)
-{
-  assert (dbox.size() == 9);
-
-  int nframes = 1;
-  int nall = dcoord_.size() / 3;
-  int nloc = nall - nghost;
-  assert (nall == datype_.size());
-
-  vector<int > datype = nnpmap.get_type();
-  vector<int > type_count (ntypes, 0);
-  for (unsigned ii = 0; ii < datype.size(); ++ii){
-    type_count[datype[ii]] ++;
-  }
-  datype.insert (datype.end(), datype_.begin() + nloc, datype_.end());
-
-  TensorShape coord_shape ;
-  coord_shape.AddDim (nframes);
-  coord_shape.AddDim (nall * 3);
-  TensorShape type_shape ;
-  type_shape.AddDim (nframes);
-  type_shape.AddDim (nall);
-  TensorShape box_shape ;
-  box_shape.AddDim (nframes);
-  box_shape.AddDim (9);
-  TensorShape mesh_shape ;
-  mesh_shape.AddDim (16);
-  TensorShape natoms_shape ;
-  natoms_shape.AddDim (2 + ntypes);
-  TensorShape fparam_shape ;
-  fparam_shape.AddDim (nframes);
-  fparam_shape.AddDim (fparam_.size());
-  TensorShape aparam_shape ;
-  aparam_shape.AddDim (nframes);
-  aparam_shape.AddDim (aparam_.size());
-  
-#ifdef HIGH_PREC
-  Tensor coord_tensor	(DT_DOUBLE, coord_shape);
-  Tensor box_tensor	(DT_DOUBLE, box_shape);
-  Tensor fparam_tensor  (DT_DOUBLE, fparam_shape);
-  Tensor aparam_tensor  (DT_DOUBLE, aparam_shape);
-#else
-  Tensor coord_tensor	(DT_FLOAT, coord_shape);
-  Tensor box_tensor	(DT_FLOAT, box_shape);
-  Tensor fparam_tensor  (DT_FLOAT, fparam_shape);
-  Tensor aparam_tensor  (DT_FLOAT, aparam_shape);
-#endif
-  Tensor type_tensor	(DT_INT32, type_shape);
-  Tensor mesh_tensor	(DT_INT32, mesh_shape);
-  Tensor natoms_tensor	(DT_INT32, natoms_shape);
-
-  auto coord = coord_tensor.matrix<VALUETYPE> ();
-  auto type = type_tensor.matrix<int> ();
-  auto box = box_tensor.matrix<VALUETYPE> ();
-  auto mesh = mesh_tensor.flat<int> ();
-  auto natoms = natoms_tensor.flat<int> ();
-  auto fparam = fparam_tensor.matrix<VALUETYPE> ();
-  auto aparam = aparam_tensor.matrix<VALUETYPE> ();
-
-  vector<VALUETYPE> dcoord (dcoord_);
-  nnpmap.forward (dcoord.begin(), dcoord_.begin(), 3);
-  
-  for (int ii = 0; ii < nframes; ++ii){
-    for (int jj = 0; jj < nall * 3; ++jj){
-      coord(ii, jj) = dcoord[jj];
-    }
-    for (int jj = 0; jj < 9; ++jj){
-      box(ii, jj) = dbox[jj];
-    }
-    for (int jj = 0; jj < nall; ++jj){
-      type(ii, jj) = datype[jj];
-    }
-    for (int jj = 0; jj < fparam_.size(); ++jj){
-      fparam(ii, jj) = fparam_[jj];
-    }
-    for (int jj = 0; jj < aparam_.size(); ++jj){
-      aparam(ii, jj) = aparam_[jj];
-    }
-  }
-  
-  for (int ii = 0; ii < 16; ++ii) mesh(ii) = 0;
-  
-  mesh (0) = sizeof(int *) / sizeof(int);
-  assert (mesh(0) * sizeof(int) == sizeof(int *));
-  const int & stride = mesh(0);
-  mesh (1) = dlist.ilist.size();
-  assert (mesh(1) == nloc);
-  assert (stride <= 4);
-  dlist.make_ptrs();
-  memcpy (&mesh(4), &(dlist.pilist), sizeof(int *));
-  memcpy (&mesh(8), &(dlist.pjrange), sizeof(int *));
-  memcpy (&mesh(12), &(dlist.pjlist), sizeof(int *));
-
-  natoms (0) = nloc;
-  natoms (1) = nall;
-  for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
-
-  input_tensors = {
-    {"t_coord",	coord_tensor}, 
-    {"t_type",	type_tensor},
-    {"t_box",		box_tensor},
-    {"t_mesh",	mesh_tensor},
-    {"t_natoms",	natoms_tensor},
-  };  
-  if (fparam_.size() > 0) {
-    input_tensors.push_back({"t_fparam", fparam_tensor});
-  }
-  if (aparam_.size() > 0) {
-    input_tensors.push_back({"t_aparam", aparam_tensor});
-  }
-
-  return nloc;
-}
 
 static void 
 run_model (ENERGYTYPE &			dener,
@@ -459,28 +137,6 @@ run_model (ENERGYTYPE &			dener,
   nnpmap.backward (datom_virial_.begin(), datom_virial.begin(), 9);
 }
 
-static void
-get_env_nthreads(int & num_intra_nthreads,
-		 int & num_inter_nthreads)
-{
-  num_intra_nthreads = 0;
-  num_inter_nthreads = 0;
-  const char* env_intra_nthreads = std::getenv("OMP_NUM_THREADS");
-  const char* env_inter_nthreads = std::getenv("TF_INTER_OP_PARALLELISM_THREADS");
-  if (env_intra_nthreads && 
-      string(env_intra_nthreads) != string("") && 
-      atoi(env_intra_nthreads) >= 0
-      ) {
-    num_intra_nthreads = atoi(env_intra_nthreads);
-  }
-  if (env_inter_nthreads && 
-      string(env_inter_nthreads) != string("") &&
-      atoi(env_inter_nthreads) >= 0
-      ) {
-    num_inter_nthreads = atoi(env_inter_nthreads);
-  }
-}
-
 
 NNPInter::
 NNPInter ()
@@ -584,14 +240,7 @@ VT
 NNPInter::
 get_scalar (const string & name) const
 {
-  std::vector<Tensor> output_tensors;
-  checkStatus (session->Run(std::vector<std::pair<string, Tensor>> ({}), 
-			    {name.c_str()}, 
-			    {}, 
-			    &output_tensors));
-  Tensor output_rc = output_tensors[0];
-  auto orc = output_rc.flat <VT> ();
-  return orc(0);
+  return session_get_scalar<VT>(session, name);
 }
 
 void
@@ -627,7 +276,7 @@ compute (ENERGYTYPE &			dener,
   validate_fparam_aparam(nloc, fparam, aparam);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, nnpmap, nghost);
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, nnpmap, nghost);
   assert (ret == nloc);
 
   run_model (dener, dforce_, dvirial, session, input_tensors, nnpmap, nghost);
@@ -657,7 +306,7 @@ compute (ENERGYTYPE &			dener,
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
   assert (nloc == ret);
 
   run_model (dener, dforce_, dvirial, session, input_tensors, nnpmap, nghost);
@@ -681,7 +330,7 @@ compute (ENERGYTYPE &			dener,
   validate_fparam_aparam(nnpmap.get_type().size(), fparam, aparam);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int nloc = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, nnpmap);
+  int nloc = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, nnpmap);
 
   run_model (dener, dforce_, dvirial, datom_energy_, datom_virial_, session, input_tensors, nnpmap);
 }
@@ -714,7 +363,7 @@ compute (ENERGYTYPE &			dener,
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
   assert (nloc == ret);
 
   run_model (dener, dforce_, dvirial, datom_energy_, datom_virial_, session, input_tensors, nnpmap, nghost);
@@ -823,18 +472,12 @@ get_scalar(const string name) const
 {
   VT myrcut = 0;
   for (unsigned ii = 0; ii < numb_models; ++ii){
-    std::vector<Tensor> output_tensors;
-    checkStatus (sessions[ii]->Run(std::vector<std::pair<string, Tensor>> ({}), 
-				   {name.c_str()}, 
-				   {}, 
-				   &output_tensors));
-    Tensor output_rc = output_tensors[0];
-    auto orc = output_rc.flat <VT> ();
+    VT ret = session_get_scalar<VT>(sessions[ii], name);
     if (ii == 0){
-      myrcut = orc(0);
+      myrcut = ret;
     }
     else {
-      assert (myrcut == orc(0));
+      assert (myrcut == ret);
     }
   }
   return myrcut;
@@ -872,7 +515,7 @@ compute (ENERGYTYPE &			dener,
   validate_fparam_aparam(nnpmap.get_type().size(), fparam, aparam);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int nloc = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, nnpmap);
+  int nloc = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, nnpmap);
 
   vector<ENERGYTYPE > all_energy (numb_models);
   vector<vector<VALUETYPE > > all_force (numb_models);
@@ -926,7 +569,7 @@ compute (vector<ENERGYTYPE> &		all_energy,
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
   assert (nloc == ret);
 
   all_energy.resize (numb_models);
@@ -965,7 +608,7 @@ compute (vector<ENERGYTYPE> &			all_energy,
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
   assert (nloc == ret);
 
   all_energy.resize (numb_models);
diff --git a/source/lib/src/common.cc b/source/lib/src/common.cc
new file mode 100644
index 0000000000..2b78d4839a
--- /dev/null
+++ b/source/lib/src/common.cc
@@ -0,0 +1,348 @@
+#include "common.h"
+#include "NNPAtomMap.h"
+#include "SimulationRegion.h"
+
+void
+convert_nlist_lmp_internal (InternalNeighborList & list,
+			    const LammpsNeighborList & lmp_list) 
+{
+  list.clear();
+  int total_num_nei = 0;
+  int inum = lmp_list.inum;
+  for (int ii = 0; ii < inum; ++ii){
+    total_num_nei += lmp_list.numneigh[ii];
+  }
+  list.ilist.resize(inum);
+  list.jrange.resize(inum+1);
+  list.jlist.resize(total_num_nei);
+  memcpy(&list.ilist[0], lmp_list.ilist, inum*sizeof(int));
+  list.jrange[0] = 0;
+  for (int ii = 0; ii < inum; ++ii){
+    int jnum = lmp_list.numneigh[ii];
+    list.jrange[ii+1] = list.jrange[ii] + jnum;
+    const int * jlist = lmp_list.firstneigh[ii];
+    memcpy(&(list.jlist[list.jrange[ii]]), jlist, jnum*sizeof(int));
+  }
+}
+
+void
+shuffle_nlist (InternalNeighborList & list, 
+	       const NNPAtomMap<VALUETYPE> & map)
+{
+  const vector<int> & fwd_map = map.get_fwd_map();
+  int nloc = fwd_map.size();
+  for (unsigned ii = 0; ii < list.ilist.size(); ++ii){
+    if (list.ilist[ii] < nloc) {
+      list.ilist[ii] = fwd_map[list.ilist[ii]];
+    }
+  }
+  for (unsigned ii = 0; ii < list.jlist.size(); ++ii){
+    if (list.jlist[ii] < nloc) {
+      list.jlist[ii] = fwd_map[list.jlist[ii]];
+    }
+  }
+}
+
+void
+checkStatus(const tensorflow::Status& status) {
+  if (!status.ok()) {
+    std::cout << status.ToString() << std::endl;
+    exit(1);
+  }
+}
+
+void
+get_env_nthreads(int & num_intra_nthreads,
+		 int & num_inter_nthreads)
+{
+  num_intra_nthreads = 0;
+  num_inter_nthreads = 0;
+  const char* env_intra_nthreads = std::getenv("OMP_NUM_THREADS");
+  const char* env_inter_nthreads = std::getenv("TF_INTER_OP_PARALLELISM_THREADS");
+  if (env_intra_nthreads && 
+      string(env_intra_nthreads) != string("") && 
+      atoi(env_intra_nthreads) >= 0
+      ) {
+    num_intra_nthreads = atoi(env_intra_nthreads);
+  }
+  if (env_inter_nthreads && 
+      string(env_inter_nthreads) != string("") &&
+      atoi(env_inter_nthreads) >= 0
+      ) {
+    num_inter_nthreads = atoi(env_inter_nthreads);
+  }
+}
+
+int
+session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
+		       const vector<VALUETYPE> &	dcoord_,
+		       const int &			ntypes,
+		       const vector<int> &		datype_,
+		       const vector<VALUETYPE> &	dbox, 
+		       const VALUETYPE &		cell_size,
+		       const vector<VALUETYPE> &	fparam_,
+		       const vector<VALUETYPE> &	aparam_,
+		       const NNPAtomMap<VALUETYPE>&	nnpmap,
+		       const int			nghost)
+{
+  bool b_ghost = (nghost != 0);
+  
+  assert (dbox.size() == 9);
+
+  int nframes = 1;
+  int nall = dcoord_.size() / 3;
+  int nloc = nall - nghost;
+  assert (nall == datype_.size());
+
+  vector<int > datype = nnpmap.get_type();
+  vector<int > type_count (ntypes, 0);
+  for (unsigned ii = 0; ii < datype.size(); ++ii){
+    type_count[datype[ii]] ++;
+  }
+  datype.insert (datype.end(), datype_.begin() + nloc, datype_.end());
+
+  SimulationRegion<VALUETYPE> region;
+  vector<double > dbox_(9);
+  for (int dd = 0; dd < 9; ++dd) dbox_[dd] = dbox[dd];
+  region.reinitBox (&dbox_[0]);
+  double box_l[3];
+  region.toFaceDistance (box_l);
+  
+  vector<int > ncell (3, 2);
+  for (int dd = 0; dd < 3; ++dd){
+    ncell[dd] = box_l[dd] / cell_size;
+    if (ncell[dd] < 2) ncell[dd] = 2;
+  }
+  vector<int > next(3, 0);
+  for (int dd = 0; dd < 3; ++dd){
+    double cellh = box_l[dd] / ncell[dd];
+    next[dd] = cellh / cell_size;
+    if (next[dd] * cellh < cell_size) next[dd]++;
+    assert (next[dd] * cellh >= cell_size);
+  }
+
+  TensorShape coord_shape ;
+  coord_shape.AddDim (nframes);
+  coord_shape.AddDim (nall * 3);
+  TensorShape type_shape ;
+  type_shape.AddDim (nframes);
+  type_shape.AddDim (nall);
+  TensorShape box_shape ;
+  box_shape.AddDim (nframes);
+  box_shape.AddDim (9);
+  TensorShape mesh_shape ;
+  if (!b_ghost){
+    mesh_shape.AddDim (6);
+  }
+  else {
+    mesh_shape.AddDim (12);
+  }
+  TensorShape natoms_shape ;
+  natoms_shape.AddDim (2 + ntypes);
+  TensorShape fparam_shape ;
+  fparam_shape.AddDim (nframes);
+  fparam_shape.AddDim (fparam_.size());
+  TensorShape aparam_shape ;
+  aparam_shape.AddDim (nframes);
+  aparam_shape.AddDim (aparam_.size());
+  
+#ifdef HIGH_PREC
+  Tensor coord_tensor	(DT_DOUBLE, coord_shape);
+  Tensor box_tensor	(DT_DOUBLE, box_shape);
+  Tensor fparam_tensor  (DT_DOUBLE, fparam_shape);
+  Tensor aparam_tensor  (DT_DOUBLE, aparam_shape);
+#else
+  Tensor coord_tensor	(DT_FLOAT, coord_shape);
+  Tensor box_tensor	(DT_FLOAT, box_shape);
+  Tensor fparam_tensor  (DT_FLOAT, fparam_shape);
+  Tensor aparam_tensor  (DT_FLOAT, aparam_shape);
+#endif
+  Tensor type_tensor	(DT_INT32, type_shape);
+  Tensor mesh_tensor	(DT_INT32, mesh_shape);
+  Tensor natoms_tensor	(DT_INT32, natoms_shape);
+
+  auto coord = coord_tensor.matrix<VALUETYPE> ();
+  auto type = type_tensor.matrix<int> ();
+  auto box = box_tensor.matrix<VALUETYPE> ();
+  auto mesh = mesh_tensor.flat<int> ();
+  auto natoms = natoms_tensor.flat<int> ();  
+  auto fparam = fparam_tensor.matrix<VALUETYPE> ();
+  auto aparam = aparam_tensor.matrix<VALUETYPE> ();
+
+  vector<VALUETYPE> dcoord (dcoord_);
+  nnpmap.forward (dcoord.begin(), dcoord_.begin(), 3);
+  
+  for (int ii = 0; ii < nframes; ++ii){
+    for (int jj = 0; jj < nall * 3; ++jj){
+      coord(ii, jj) = dcoord[jj];
+    }
+    for (int jj = 0; jj < 9; ++jj){
+      box(ii, jj) = dbox[jj];
+    }
+    for (int jj = 0; jj < nall; ++jj){
+      type(ii, jj) = datype[jj];
+    }
+    for (int jj = 0; jj < fparam_.size(); ++jj){
+      fparam(ii, jj) = fparam_[jj];
+    }
+    for (int jj = 0; jj < aparam_.size(); ++jj){
+      aparam(ii, jj) = aparam_[jj];
+    }
+  }
+  mesh (1-1) = 0;
+  mesh (2-1) = 0;
+  mesh (3-1) = 0;
+  mesh (4-1) = ncell[0];
+  mesh (5-1) = ncell[1];
+  mesh (6-1) = ncell[2];
+  if (b_ghost){
+    mesh(7-1) = -next[0];
+    mesh(8-1) = -next[1];
+    mesh(9-1) = -next[2];
+    mesh(10-1) = ncell[0] + next[0];
+    mesh(11-1) = ncell[1] + next[1];
+    mesh(12-1) = ncell[2] + next[2];
+  }
+  natoms (0) = nloc;
+  natoms (1) = nall;
+  for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
+
+  input_tensors = {
+    {"t_coord",	coord_tensor}, 
+    {"t_type",	type_tensor},
+    {"t_box",	box_tensor},
+    {"t_mesh",	mesh_tensor},
+    {"t_natoms",natoms_tensor},
+  };  
+  if (fparam_.size() > 0) {
+    input_tensors.push_back({"t_fparam", fparam_tensor});
+  }
+  if (aparam_.size() > 0) {
+    input_tensors.push_back({"t_aparam", aparam_tensor});
+  }
+  return nloc;
+}
+
+int
+session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
+		       const vector<VALUETYPE> &	dcoord_,
+		       const int &			ntypes,
+		       const vector<int> &		datype_,
+		       const vector<VALUETYPE> &	dbox,		    
+		       InternalNeighborList &		dlist, 
+		       const vector<VALUETYPE> &	fparam_,
+		       const vector<VALUETYPE> &	aparam_,
+		       const NNPAtomMap<VALUETYPE>&	nnpmap,
+		       const int			nghost)
+{
+  assert (dbox.size() == 9);
+
+  int nframes = 1;
+  int nall = dcoord_.size() / 3;
+  int nloc = nall - nghost;
+  assert (nall == datype_.size());
+
+  vector<int > datype = nnpmap.get_type();
+  vector<int > type_count (ntypes, 0);
+  for (unsigned ii = 0; ii < datype.size(); ++ii){
+    type_count[datype[ii]] ++;
+  }
+  datype.insert (datype.end(), datype_.begin() + nloc, datype_.end());
+
+  TensorShape coord_shape ;
+  coord_shape.AddDim (nframes);
+  coord_shape.AddDim (nall * 3);
+  TensorShape type_shape ;
+  type_shape.AddDim (nframes);
+  type_shape.AddDim (nall);
+  TensorShape box_shape ;
+  box_shape.AddDim (nframes);
+  box_shape.AddDim (9);
+  TensorShape mesh_shape ;
+  mesh_shape.AddDim (16);
+  TensorShape natoms_shape ;
+  natoms_shape.AddDim (2 + ntypes);
+  TensorShape fparam_shape ;
+  fparam_shape.AddDim (nframes);
+  fparam_shape.AddDim (fparam_.size());
+  TensorShape aparam_shape ;
+  aparam_shape.AddDim (nframes);
+  aparam_shape.AddDim (aparam_.size());
+  
+#ifdef HIGH_PREC
+  Tensor coord_tensor	(DT_DOUBLE, coord_shape);
+  Tensor box_tensor	(DT_DOUBLE, box_shape);
+  Tensor fparam_tensor  (DT_DOUBLE, fparam_shape);
+  Tensor aparam_tensor  (DT_DOUBLE, aparam_shape);
+#else
+  Tensor coord_tensor	(DT_FLOAT, coord_shape);
+  Tensor box_tensor	(DT_FLOAT, box_shape);
+  Tensor fparam_tensor  (DT_FLOAT, fparam_shape);
+  Tensor aparam_tensor  (DT_FLOAT, aparam_shape);
+#endif
+  Tensor type_tensor	(DT_INT32, type_shape);
+  Tensor mesh_tensor	(DT_INT32, mesh_shape);
+  Tensor natoms_tensor	(DT_INT32, natoms_shape);
+
+  auto coord = coord_tensor.matrix<VALUETYPE> ();
+  auto type = type_tensor.matrix<int> ();
+  auto box = box_tensor.matrix<VALUETYPE> ();
+  auto mesh = mesh_tensor.flat<int> ();
+  auto natoms = natoms_tensor.flat<int> ();
+  auto fparam = fparam_tensor.matrix<VALUETYPE> ();
+  auto aparam = aparam_tensor.matrix<VALUETYPE> ();
+
+  vector<VALUETYPE> dcoord (dcoord_);
+  nnpmap.forward (dcoord.begin(), dcoord_.begin(), 3);
+  
+  for (int ii = 0; ii < nframes; ++ii){
+    for (int jj = 0; jj < nall * 3; ++jj){
+      coord(ii, jj) = dcoord[jj];
+    }
+    for (int jj = 0; jj < 9; ++jj){
+      box(ii, jj) = dbox[jj];
+    }
+    for (int jj = 0; jj < nall; ++jj){
+      type(ii, jj) = datype[jj];
+    }
+    for (int jj = 0; jj < fparam_.size(); ++jj){
+      fparam(ii, jj) = fparam_[jj];
+    }
+    for (int jj = 0; jj < aparam_.size(); ++jj){
+      aparam(ii, jj) = aparam_[jj];
+    }
+  }
+  
+  for (int ii = 0; ii < 16; ++ii) mesh(ii) = 0;
+  
+  mesh (0) = sizeof(int *) / sizeof(int);
+  assert (mesh(0) * sizeof(int) == sizeof(int *));
+  const int & stride = mesh(0);
+  mesh (1) = dlist.ilist.size();
+  assert (mesh(1) == nloc);
+  assert (stride <= 4);
+  dlist.make_ptrs();
+  memcpy (&mesh(4), &(dlist.pilist), sizeof(int *));
+  memcpy (&mesh(8), &(dlist.pjrange), sizeof(int *));
+  memcpy (&mesh(12), &(dlist.pjlist), sizeof(int *));
+
+  natoms (0) = nloc;
+  natoms (1) = nall;
+  for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
+
+  input_tensors = {
+    {"t_coord",	coord_tensor}, 
+    {"t_type",	type_tensor},
+    {"t_box",	box_tensor},
+    {"t_mesh",	mesh_tensor},
+    {"t_natoms",natoms_tensor},
+  };  
+  if (fparam_.size() > 0) {
+    input_tensors.push_back({"t_fparam", fparam_tensor});
+  }
+  if (aparam_.size() > 0) {
+    input_tensors.push_back({"t_aparam", aparam_tensor});
+  }
+
+  return nloc;
+}
diff --git a/source/train/Model.py b/source/train/Model.py
index c568dbe9ec..3615440612 100644
--- a/source/train/Model.py
+++ b/source/train/Model.py
@@ -345,6 +345,9 @@ def build (self,
             t_mt = tf.constant(self.model_type, 
                                name = 'model_type', 
                                dtype = tf.string)
+            t_od = tf.constant(self.get_out_size(), 
+                               name = 'output_dim', 
+                               dtype = tf.int32)
 
         coord = tf.reshape (coord_, [-1, natoms[1] * 3])
         atype = tf.reshape (atype_, [-1, natoms[1]])

From d34c055a130e5229b6b3e050d9f432703c07a181 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 20 Nov 2019 17:57:03 +0800
Subject: [PATCH 26/38] fix bugs in DeepTensor. mv common methods of DeepPot
 and DeepTensor inference to common.h/.cc. fix bugs in DataModifier,
 EwaldRecp, DeepPot and DeepEval.

---
 source/lib/include/DeepTensor.h |  17 +++-
 source/lib/include/NNPInter.h   |  18 +++-
 source/lib/include/common.h     | 106 +++++++++++++++++++++--
 source/lib/src/DeepTensor.cc    |  91 ++++++++++++++++----
 source/lib/src/NNPInter.cc      |  52 ++++++++++--
 source/lib/src/common.cc        | 144 ++++++++++++++++++++++++++++----
 source/scripts/freeze.py        |   3 +-
 source/train/DataModifier.py    |   4 +
 source/train/DeepEval.py        |  34 +++++---
 source/train/DeepPot.py         |  10 ++-
 source/train/EwaldRecp.py       |  23 ++---
 source/train/Fitting.py         |   4 +
 12 files changed, 431 insertions(+), 75 deletions(-)

diff --git a/source/lib/include/DeepTensor.h b/source/lib/include/DeepTensor.h
index 389177c6e7..14f0f69997 100644
--- a/source/lib/include/DeepTensor.h
+++ b/source/lib/include/DeepTensor.h
@@ -5,7 +5,7 @@
 class DeepTensor
 {
 public:
-  DeepTensor () ;
+  DeepTensor ();
   DeepTensor  (const string & model, const int & gpu_rank = 0);
   void init (const string & model, const int & gpu_rank = 0);
   void print_summary(const string &pre) const;
@@ -23,6 +23,8 @@ class DeepTensor
 		const LammpsNeighborList &	lmp_list);
   VALUETYPE cutoff () const {assert(inited); return rcut;};
   int numb_types () const {assert(inited); return ntypes;};
+  int output_dim () const {assert(inited); return odim;};
+  vector<int> sel_types () const {assert(inited); return sel_type;};
 private:
   Session* session;
   int num_intra_nthreads, num_inter_nthreads;
@@ -33,11 +35,24 @@ class DeepTensor
   int ntypes;
   string model_type;
   int odim;
+  vector<int> sel_type;
   template<class VT> VT get_scalar(const string & name) const;
+  template<class VT> vector<VT> get_vector(const string & name) const;
   void run_model (vector<VALUETYPE> &		d_tensor_,
 		  Session *			session, 
 		  const std::vector<std::pair<string, Tensor>> & input_tensors,
 		  const NNPAtomMap<VALUETYPE> &	nnpmap, 
 		  const int			nghost = 0);
+  void compute_inner (vector<VALUETYPE> &	value,
+		      const vector<VALUETYPE> &	coord,
+		      const vector<int> &	atype,
+		      const vector<VALUETYPE> &	box,
+		      const int			nghost = 0);
+  void compute_inner (vector<VALUETYPE> &	value,
+		      const vector<VALUETYPE> &	coord,
+		      const vector<int> &	atype,
+		      const vector<VALUETYPE> &	box, 
+		      const int			nghost,
+		      const InternalNeighborList&lmp_list);
 };
 
diff --git a/source/lib/include/NNPInter.h b/source/lib/include/NNPInter.h
index 7aa4c0ebe9..113304a0b3 100644
--- a/source/lib/include/NNPInter.h
+++ b/source/lib/include/NNPInter.h
@@ -61,8 +61,8 @@ class NNPInter
   GraphDef graph_def;
   bool inited;
   template<class VT> VT get_scalar(const string & name) const;
-  VALUETYPE get_rcut () const;
-  int get_ntypes () const;
+  // VALUETYPE get_rcut () const;
+  // int get_ntypes () const;
   VALUETYPE rcut;
   VALUETYPE cell_size;
   int ntypes;
@@ -71,6 +71,16 @@ class NNPInter
   void validate_fparam_aparam(const int & nloc,
 			      const vector<VALUETYPE> &fparam,
 			      const vector<VALUETYPE> &aparam)const ;
+  void compute_inner (ENERGYTYPE &			ener,
+		vector<VALUETYPE> &		force,
+		vector<VALUETYPE> &		virial,
+		const vector<VALUETYPE> &	coord,
+		const vector<int> &		atype,
+		const vector<VALUETYPE> &	box, 
+		const int			nghost,
+		const InternalNeighborList &	lmp_list,
+		const vector<VALUETYPE>	&	fparam = vector<VALUETYPE>(),
+		const vector<VALUETYPE>	&	aparam = vector<VALUETYPE>());
 };
 
 class NNPInterModelDevi
@@ -136,8 +146,8 @@ class NNPInterModelDevi
   vector<GraphDef> graph_defs;
   bool inited;
   template<class VT> VT get_scalar(const string name) const;
-  VALUETYPE get_rcut () const;
-  int get_ntypes () const;
+  // VALUETYPE get_rcut () const;
+  // int get_ntypes () const;
   VALUETYPE rcut;
   VALUETYPE cell_size;
   int ntypes;
diff --git a/source/lib/include/common.h b/source/lib/include/common.h
index 3e4ad3c521..8b92dba15f 100644
--- a/source/lib/include/common.h
+++ b/source/lib/include/common.h
@@ -54,10 +54,44 @@ void
 convert_nlist_lmp_internal (InternalNeighborList & list,
 			    const LammpsNeighborList & lmp_list);
 
+void
+shuffle_nlist (InternalNeighborList & list, 
+	       const vector<int> & fwd_map);
+
 void
 shuffle_nlist (InternalNeighborList & list, 
 	       const NNPAtomMap<VALUETYPE> & map);
 
+void
+shuffle_nlist_exclude_empty (InternalNeighborList & list, 
+			     const vector<int> & fwd_map);
+
+
+void 
+select_by_type(vector<int> & fwd_map,
+	       vector<int> & bkw_map,
+	       int & nghost_real, 
+	       const vector<VALUETYPE> & dcoord_, 
+	       const vector<int> & datype_,
+	       const int & nghost,
+	       const vector<int> & sel_type_);
+
+void
+select_real_atoms(vector<int> & fwd_map,
+		  vector<int> & bkw_map,
+		  int & nghost_real,
+		  const vector<VALUETYPE> & dcoord_, 
+		  const vector<int> & datype_,
+		  const int & nghost,
+		  const int & ntypes);
+
+template<typename VT>
+void 
+select_map(vector<VT> & out,
+	   const vector<VT > & in,
+	   const vector<int > & fwd_map, 
+	   const int & stride);
+
 void
 get_env_nthreads(int & num_intra_nthreads,
 		 int & num_inter_nthreads);
@@ -65,9 +99,13 @@ get_env_nthreads(int & num_intra_nthreads,
 void
 checkStatus(const tensorflow::Status& status);
 
-template<class VT>
+template<typename VT>
 VT
-session_get_scalar(Session* session, const string name);
+session_get_scalar(Session* session, const string name, const string scope = "");
+
+template<typename VT>
+vector<VT>
+session_get_vector(Session* session, const string name, const string scope = "");
 
 int
 session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
@@ -79,7 +117,8 @@ session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		       const vector<VALUETYPE> &	fparam_,
 		       const vector<VALUETYPE> &	aparam_,
 		       const NNPAtomMap<VALUETYPE>&	nnpmap,
-		       const int			nghost = 0);
+		       const int			nghost = 0,
+		       const string			scope = "");
 
 int
 session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
@@ -91,13 +130,18 @@ session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		       const vector<VALUETYPE> &	fparam_,
 		       const vector<VALUETYPE> &	aparam_,
 		       const NNPAtomMap<VALUETYPE>&	nnpmap,
-		       const int			nghost);
+		       const int			nghost,
+		       const string			scope = "");
 
 
-template<class VT>
+template<typename VT>
 VT
-session_get_scalar(Session* session, const string name) 
+session_get_scalar(Session* session, const string name_, const string scope) 
 {
+  string name = name_;
+  if (scope != "") {
+    name = scope + "/" + name;
+  }
   std::vector<Tensor> output_tensors;
   checkStatus (session->Run(std::vector<std::pair<string, Tensor>> ({}), 
 			    {name.c_str()}, 
@@ -107,3 +151,53 @@ session_get_scalar(Session* session, const string name)
   auto orc = output_rc.flat <VT> ();
   return orc(0);
 }
+
+template<typename VT>
+vector<VT>
+session_get_vector(Session* session, const string name_, const string scope) 
+{
+  string name = name_;
+  if (scope != "") {
+    name = scope + "/" + name;
+  }
+  std::vector<Tensor> output_tensors;
+  checkStatus (session->Run(std::vector<std::pair<string, Tensor>> ({}), 
+			    {name.c_str()}, 
+			    {}, 
+			    &output_tensors));
+  Tensor output_rc = output_tensors[0];
+  assert(1 == output_rc.shape().dims());
+  int dof = output_rc.shape().dim_size(0);
+  vector<VT> o_vec(dof);
+  auto orc = output_rc.flat <VT> ();
+  for (int ii = 0; ii < dof; ++ii){
+    o_vec[ii] = orc(ii);
+  }  
+  return o_vec;
+}
+
+
+template<typename VT>
+void 
+select_map(vector<VT> & out,
+	   const vector<VT > & in,
+	   const vector<int > & idx_map, 
+	   const int & stride)
+{
+#ifdef DEBUG
+  assert(in.size() / stride * stride == in.size()), "in size should be multiples of stride"
+#endif
+  for (int ii = 0; ii < in.size() / stride; ++ii){
+#ifdef DEBUG
+    assert(ii < idx_map.size()), "idx goes over the idx map size";
+    assert(idx_map[ii] < out.size()), "mappped idx goes over the out size";
+#endif
+    if (idx_map[ii] >= 0) {
+      int to_ii = idx_map[ii];
+      for (int dd = 0; dd < stride; ++dd){
+	out[to_ii * stride + dd] = in[ii * stride + dd];
+      }
+    }
+  }
+}
+
diff --git a/source/lib/src/DeepTensor.cc b/source/lib/src/DeepTensor.cc
index 00eab78f0f..ea0a7579a8 100644
--- a/source/lib/src/DeepTensor.cc
+++ b/source/lib/src/DeepTensor.cc
@@ -1,5 +1,11 @@
 #include "DeepTensor.h"
 
+DeepTensor::
+DeepTensor()
+    : inited (false)
+{
+}
+
 DeepTensor::
 DeepTensor(const string & model, const int & gpu_rank)
     : inited (false)
@@ -24,6 +30,7 @@ init (const string & model, const int & gpu_rank)
   ntypes = get_scalar<int>("descrpt_attr/ntypes");
   model_type = get_scalar<string>("model_attr/model_type");
   odim = get_scalar<int>("model_attr/output_dim");
+  sel_type = get_vector<int>("model_attr/sel_type");
   inited = true;
 }
 
@@ -35,6 +42,14 @@ get_scalar (const string & name) const
   return session_get_scalar<VT>(session, name);
 }
 
+template<class VT>
+vector<VT>
+DeepTensor::
+get_vector (const string & name) const
+{
+  return session_get_vector<VT>(session, name);
+}
+
 void 
 DeepTensor::
 run_model (vector<VALUETYPE> &		d_tensor_,
@@ -46,9 +61,8 @@ run_model (vector<VALUETYPE> &		d_tensor_,
   unsigned nloc = nnpmap.get_type().size();
   unsigned nall = nloc + nghost;
   if (nloc == 0) {
-    // no backward map needed
-    d_tensor_.resize(nall * odim);
-    fill(d_tensor_.begin(), d_tensor_.end(), 0.0);
+    // return empty
+    d_tensor_.clear();
     return;
   }
 
@@ -59,17 +73,19 @@ run_model (vector<VALUETYPE> &		d_tensor_,
 			    &output_tensors));
   
   Tensor output_t = output_tensors[0];
+  assert (output_t.dims() == 1), "dim of output tensor should be 1";
+  int o_size = output_t.dim_size(0);
 
   auto ot = output_t.flat<VALUETYPE> ();
 
-  vector<VALUETYPE> d_tensor (nall * odim);
-  for (unsigned ii = 0; ii < nall * odim; ++ii){
+  vector<VALUETYPE> d_tensor (o_size);
+  for (unsigned ii = 0; ii < o_size; ++ii){
     d_tensor[ii] = ot(ii);
   }
   d_tensor_ = d_tensor;
-  nnpmap.backward (d_tensor_.begin(), d_tensor.begin(), odim);
 }
 
+
 void
 DeepTensor::
 compute (vector<VALUETYPE> &		dtensor_,
@@ -77,6 +93,54 @@ compute (vector<VALUETYPE> &		dtensor_,
 	 const vector<int> &		datype_,
 	 const vector<VALUETYPE> &	dbox, 
 	 const int			nghost)
+{
+  vector<VALUETYPE> dcoord;
+  vector<int> datype, fwd_map, bkw_map;
+  int nghost_real;
+  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);
+  // resize to nall_real
+  dcoord.resize(bkw_map.size() * 3);
+  datype.resize(bkw_map.size());
+  // fwd map
+  select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
+  select_map<int>(datype, datype_, fwd_map, 1);
+  compute_inner(dtensor_, dcoord, datype, dbox, nghost_real);
+}
+
+void
+DeepTensor::
+compute (vector<VALUETYPE> &		dtensor_,
+	 const vector<VALUETYPE> &	dcoord_,
+	 const vector<int> &		datype_,
+	 const vector<VALUETYPE> &	dbox, 
+	 const int			nghost,
+	 const LammpsNeighborList &	lmp_list)
+{
+  vector<VALUETYPE> dcoord;
+  vector<int> datype, fwd_map, bkw_map;
+  int nghost_real;
+  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);
+  // resize to nall_real
+  dcoord.resize(bkw_map.size() * 3);
+  datype.resize(bkw_map.size());
+  // fwd map
+  select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
+  select_map<int>(datype, datype_, fwd_map, 1);
+  // internal nlist
+  InternalNeighborList nlist;
+  convert_nlist_lmp_internal(nlist, lmp_list);
+  shuffle_nlist_exclude_empty(nlist, fwd_map);  
+  compute_inner(dtensor_, dcoord, datype, dbox, nghost_real, nlist);
+}
+
+
+void
+DeepTensor::
+compute_inner (vector<VALUETYPE> &		dtensor_,
+	       const vector<VALUETYPE> &	dcoord_,
+	       const vector<int> &		datype_,
+	       const vector<VALUETYPE> &	dbox, 
+	       const int			nghost)
 {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
@@ -92,20 +156,19 @@ compute (vector<VALUETYPE> &		dtensor_,
 
 void
 DeepTensor::
-compute (vector<VALUETYPE> &		dtensor_,
-	 const vector<VALUETYPE> &	dcoord_,
-	 const vector<int> &		datype_,
-	 const vector<VALUETYPE> &	dbox, 
-	 const int			nghost,
-	 const LammpsNeighborList &	lmp_list)
+compute_inner (vector<VALUETYPE> &		dtensor_,
+	       const vector<VALUETYPE> &	dcoord_,
+	       const vector<int> &		datype_,
+	       const vector<VALUETYPE> &	dbox, 
+	       const int			nghost,
+	       const InternalNeighborList &	nlist_)
 {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.begin() + nloc);
   assert (nloc == nnpmap.get_type().size());
 
-  InternalNeighborList nlist;
-  convert_nlist_lmp_internal (nlist, lmp_list);
+  InternalNeighborList nlist(nlist_);
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
diff --git a/source/lib/src/NNPInter.cc b/source/lib/src/NNPInter.cc
index b01fe6dc1c..149ece4736 100644
--- a/source/lib/src/NNPInter.cc
+++ b/source/lib/src/NNPInter.cc
@@ -184,8 +184,8 @@ init (const string & model, const int & gpu_rank)
   ntypes = get_scalar<int>("descrpt_attr/ntypes");
   dfparam = get_scalar<int>("fitting_attr/dfparam");
   daparam = get_scalar<int>("fitting_attr/daparam");
-  assert(rcut == get_rcut());
-  assert(ntypes == get_ntypes());
+  // assert(rcut == get_rcut());
+  // assert(ntypes == get_ntypes());
   if (dfparam < 0) dfparam = 0;
   if (daparam < 0) daparam = 0;
   inited = true;
@@ -207,8 +207,8 @@ init (const string & model, const int & gpu_rank)
   ntypes = get_scalar<int>("descrpt_attr/ntypes");
   dfparam = get_scalar<int>("fitting_attr/dfparam");
   daparam = get_scalar<int>("fitting_attr/daparam");
-  assert(rcut == get_rcut());
-  assert(ntypes == get_ntypes());
+  // assert(rcut == get_rcut());
+  // assert(ntypes == get_ntypes());
   if (dfparam < 0) dfparam = 0;
   if (daparam < 0) daparam = 0;
   // rcut = get_rcut();
@@ -293,7 +293,44 @@ compute (ENERGYTYPE &			dener,
 	 const int			nghost,
 	 const LammpsNeighborList &	lmp_list,
 	 const vector<VALUETYPE> &	fparam,
-	 const vector<VALUETYPE> &	aparam)
+	 const vector<VALUETYPE> &	aparam_)
+{
+  vector<VALUETYPE> dcoord, dforce, aparam;
+  vector<int> datype, fwd_map, bkw_map;
+  int nghost_real;
+  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);
+  // resize to nall_real
+  dcoord.resize(bkw_map.size() * 3);
+  datype.resize(bkw_map.size());
+  // fwd map
+  select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
+  select_map<int>(datype, datype_, fwd_map, 1);
+  // aparam
+  if (daparam > 0){
+    aparam.resize(bkw_map.size());
+    select_map<VALUETYPE>(aparam, aparam_, fwd_map, daparam);
+  }
+  // internal nlist
+  InternalNeighborList nlist;
+  convert_nlist_lmp_internal(nlist, lmp_list);
+  shuffle_nlist_exclude_empty(nlist, fwd_map);  
+  compute_inner(dener, dforce, dvirial, dcoord, datype, dbox, nghost_real, nlist, fparam, aparam);
+  // bkw map
+  select_map<VALUETYPE>(dforce_, dforce, bkw_map, 3);
+}
+
+void
+NNPInter::
+compute_inner (ENERGYTYPE &			dener,
+	       vector<VALUETYPE> &		dforce_,
+	       vector<VALUETYPE> &		dvirial,
+	       const vector<VALUETYPE> &	dcoord_,
+	       const vector<int> &		datype_,
+	       const vector<VALUETYPE> &	dbox, 
+	       const int			nghost,
+	       const InternalNeighborList &	nlist_,
+	       const vector<VALUETYPE> &	fparam,
+	       const vector<VALUETYPE> &	aparam)
 {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
@@ -301,9 +338,10 @@ compute (ENERGYTYPE &			dener,
   assert (nloc == nnpmap.get_type().size());
   validate_fparam_aparam(nloc, fparam, aparam);
 
-  InternalNeighborList nlist;
-  convert_nlist_lmp_internal (nlist, lmp_list);
+  InternalNeighborList nlist(nlist_);
   shuffle_nlist (nlist, nnpmap);
+  // convert_nlist_lmp_internal (nlist, lmp_list);
+  // shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
   int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, nnpmap, nghost);
diff --git a/source/lib/src/common.cc b/source/lib/src/common.cc
index 2b78d4839a..708de71b5d 100644
--- a/source/lib/src/common.cc
+++ b/source/lib/src/common.cc
@@ -2,6 +2,64 @@
 #include "NNPAtomMap.h"
 #include "SimulationRegion.h"
 
+void 
+select_by_type(vector<int> & fwd_map,
+	       vector<int> & bkw_map,
+	       int & nghost_real, 
+	       const vector<VALUETYPE> & dcoord_, 
+	       const vector<int> & datype_,
+	       const int & nghost,
+	       const vector<int> & sel_type_)
+{
+  vector<int> sel_type (sel_type_);
+  sort(sel_type.begin(), sel_type.end());  
+  int nall = dcoord_.size() / 3;
+  int nloc = nall - nghost;
+  int nloc_real = 0;
+  nghost_real = 0;
+  fwd_map.resize(nall);
+  bkw_map.clear();
+  bkw_map.reserve(nall);  
+  int cc = 0;
+  for (int ii = 0; ii < nall; ++ii){
+    // exclude virtual sites
+    // select the type with id < ntypes
+    if (lower_bound(sel_type.begin(), sel_type.end(), datype_[ii]) !=
+	sel_type.end()){
+      bkw_map.push_back(ii);
+      if (ii < nloc) {
+	nloc_real += 1;
+      }
+      else{
+	nghost_real += 1;
+      }
+      fwd_map[ii] = cc;
+      cc ++;
+    }
+    else{
+      fwd_map[ii] = -1;
+    }
+  }  
+  assert((nloc_real+nghost_real) == bkw_map.size());  
+}	       
+
+
+void
+select_real_atoms(vector<int> & fwd_map,
+		  vector<int> & bkw_map,
+		  int & nghost_real,
+		  const vector<VALUETYPE> & dcoord_, 
+		  const vector<int> & datype_,
+		  const int & nghost,
+		  const int & ntypes)
+{
+  vector<int > sel_type;
+  for (int ii = 0; ii < ntypes; ++ii){
+    sel_type.push_back(ii);
+  }
+  select_by_type(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, sel_type);
+}
+
 void
 convert_nlist_lmp_internal (InternalNeighborList & list,
 			    const LammpsNeighborList & lmp_list) 
@@ -30,6 +88,13 @@ shuffle_nlist (InternalNeighborList & list,
 	       const NNPAtomMap<VALUETYPE> & map)
 {
   const vector<int> & fwd_map = map.get_fwd_map();
+  shuffle_nlist(list, fwd_map);
+}
+
+void
+shuffle_nlist (InternalNeighborList & list, 
+	       const vector<int> & fwd_map)
+{
   int nloc = fwd_map.size();
   for (unsigned ii = 0; ii < list.ilist.size(); ++ii){
     if (list.ilist[ii] < nloc) {
@@ -43,6 +108,41 @@ shuffle_nlist (InternalNeighborList & list,
   }
 }
 
+void
+shuffle_nlist_exclude_empty (InternalNeighborList & list, 
+			     const vector<int> & fwd_map)
+{
+  int old_nloc = fwd_map.size();
+  shuffle_nlist(list, fwd_map);
+  vector<int> new_ilist, new_jrange, new_jlist, new_icount;
+  new_ilist.reserve(list.ilist.size());
+  new_icount.reserve(list.ilist.size());
+  new_jrange.reserve(list.jrange.size());
+  new_jlist.reserve(list.jlist.size());
+  for(int ii = 0; ii < list.ilist.size(); ++ii){
+    if(list.ilist[ii] >= 0){
+      new_ilist.push_back(list.ilist[ii]);
+    }
+  }
+  new_jrange.resize(new_ilist.size()+1);
+  new_jrange[0] = 0;
+  for(int ii = 0; ii < list.ilist.size(); ++ii){
+    int js = list.jrange[ii];
+    int je = list.jrange[ii+1];
+    int cc = 0;
+    for (int jj = js; jj < je; ++jj){
+      if (list.jlist[jj] >= 0) {
+	new_jlist.push_back(list.jlist[jj]);
+	cc++;
+      }      
+    }
+    new_jrange[ii+1] = new_jrange[ii] + cc;
+  }
+  list.ilist = new_ilist;
+  list.jrange = new_jrange;
+  list.jlist = new_jlist;
+}
+
 void
 checkStatus(const tensorflow::Status& status) {
   if (!status.ok()) {
@@ -83,7 +183,8 @@ session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		       const vector<VALUETYPE> &	fparam_,
 		       const vector<VALUETYPE> &	aparam_,
 		       const NNPAtomMap<VALUETYPE>&	nnpmap,
-		       const int			nghost)
+		       const int			nghost, 
+		       const string			scope)
 {
   bool b_ghost = (nghost != 0);
   
@@ -207,18 +308,22 @@ session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
   natoms (1) = nall;
   for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
 
+  string prefix = "";
+  if (scope != ""){
+    prefix = scope + "/";
+  }
   input_tensors = {
-    {"t_coord",	coord_tensor}, 
-    {"t_type",	type_tensor},
-    {"t_box",	box_tensor},
-    {"t_mesh",	mesh_tensor},
-    {"t_natoms",natoms_tensor},
+    {prefix+"t_coord",	coord_tensor}, 
+    {prefix+"t_type",	type_tensor},
+    {prefix+"t_box",	box_tensor},
+    {prefix+"t_mesh",	mesh_tensor},
+    {prefix+"t_natoms",	natoms_tensor},
   };  
   if (fparam_.size() > 0) {
-    input_tensors.push_back({"t_fparam", fparam_tensor});
+    input_tensors.push_back({prefix+"t_fparam", fparam_tensor});
   }
   if (aparam_.size() > 0) {
-    input_tensors.push_back({"t_aparam", aparam_tensor});
+    input_tensors.push_back({prefix+"t_aparam", aparam_tensor});
   }
   return nloc;
 }
@@ -233,14 +338,15 @@ session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		       const vector<VALUETYPE> &	fparam_,
 		       const vector<VALUETYPE> &	aparam_,
 		       const NNPAtomMap<VALUETYPE>&	nnpmap,
-		       const int			nghost)
+		       const int			nghost,
+		       const string			scope)
 {
   assert (dbox.size() == 9);
 
   int nframes = 1;
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
-  assert (nall == datype_.size());
+  assert (nall == datype_.size());  
 
   vector<int > datype = nnpmap.get_type();
   vector<int > type_count (ntypes, 0);
@@ -330,18 +436,22 @@ session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
   natoms (1) = nall;
   for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
 
+  string prefix = "";
+  if (scope != ""){
+    prefix = scope + "/";
+  }
   input_tensors = {
-    {"t_coord",	coord_tensor}, 
-    {"t_type",	type_tensor},
-    {"t_box",	box_tensor},
-    {"t_mesh",	mesh_tensor},
-    {"t_natoms",natoms_tensor},
+    {prefix+"t_coord",	coord_tensor}, 
+    {prefix+"t_type",	type_tensor},
+    {prefix+"t_box",	box_tensor},
+    {prefix+"t_mesh",	mesh_tensor},
+    {prefix+"t_natoms",natoms_tensor},
   };  
   if (fparam_.size() > 0) {
-    input_tensors.push_back({"t_fparam", fparam_tensor});
+    input_tensors.push_back({prefix+"t_fparam", fparam_tensor});
   }
   if (aparam_.size() > 0) {
-    input_tensors.push_back({"t_aparam", aparam_tensor});
+    input_tensors.push_back({prefix+"t_aparam", aparam_tensor});
   }
 
   return nloc;
diff --git a/source/scripts/freeze.py b/source/scripts/freeze.py
index 4dd51492a4..a7bb8ca11a 100755
--- a/source/scripts/freeze.py
+++ b/source/scripts/freeze.py
@@ -41,7 +41,7 @@ def _make_node_names(model_type = None) :
     elif model_type == 'wfc':
         nodes = "o_wfc,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     elif model_type == 'dipole':
-        nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
+        nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type,model_attr/output_dim"
     elif model_type == 'polar':
         nodes = "o_polar,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     elif model_type == 'global_polar':
@@ -75,6 +75,7 @@ def freeze_graph(model_folder,
     # We retrieve the protobuf graph definition
     graph = tf.get_default_graph()
     input_graph_def = graph.as_graph_def()
+    # nodes = [n.name for n in input_graph_def.node]
 
     # We start a session and restore the graph weights
     with tf.Session() as sess:
diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
index 029e64219f..ee2ff0d3cb 100644
--- a/source/train/DataModifier.py
+++ b/source/train/DataModifier.py
@@ -129,8 +129,12 @@ def _build_fv_graph_inner(self):
                                           self.t_natoms,
                                           n_a_sel = self.nnei_a,
                                           n_r_sel = self.nnei_r)
+        force = tf.identity(force, name='o_dm_force')
+        virial = tf.identity(virial, name='o_dm_virial')
+        atom_virial = tf.identity(atom_virial, name='o_dm_av')
         return force, virial, atom_virial
 
+
     def _enrich(self, dipole, dof = 3):
         coll = []                
         sel_start_idx = 0
diff --git a/source/train/DeepEval.py b/source/train/DeepEval.py
index 2c0f613c27..e1156eefa7 100644
--- a/source/train/DeepEval.py
+++ b/source/train/DeepEval.py
@@ -25,24 +25,23 @@ class DeepEval():
     def __init__(self, 
                  model_file) :
         model_file = model_file
-        graph = self.load_graph (model_file)
-        t_mt = graph.get_tensor_by_name('load/model_attr/model_type:0')
-        sess = tf.Session (graph = graph)        
+        self.graph = self._load_graph (model_file)
+        t_mt = self.graph.get_tensor_by_name('load/model_attr/model_type:0')
+        sess = tf.Session (graph = self.graph)
         [mt] = sess.run([t_mt], feed_dict = {})
         self.model_type = mt.decode('utf-8')
 
-    def load_graph(self, 
+    def _load_graph(self, 
                    frozen_graph_filename, 
-                   prefix = 'load'):
+                   prefix = 'load', 
+                   default_tf_graph = True):
         # We load the protobuf file from the disk and parse it to retrieve the 
         # unserialized graph_def
         with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
             graph_def = tf.GraphDef()
             graph_def.ParseFromString(f.read())
 
-        # Then, we can use again a convenient built-in function to import a graph_def into the 
-        # current default Graph
-        with tf.Graph().as_default() as graph:
+        if default_tf_graph:
             tf.import_graph_def(
                 graph_def, 
                 input_map=None, 
@@ -50,6 +49,21 @@ def load_graph(self,
                 name=prefix, 
                 producer_op_list=None
             )
+            graph = tf.get_default_graph()
+        else :
+            # Then, we can use again a convenient built-in function to import a graph_def into the 
+            # current default Graph
+            with tf.Graph().as_default() as graph:
+                tf.import_graph_def(
+                    graph_def,
+                    input_map=None,
+                    return_elements=None,
+                    name=prefix,
+                    producer_op_list=None
+                )
+        # for ii in graph.as_graph_def().node:
+        #     print(ii.name)
+
         return graph
 
 
@@ -100,8 +114,8 @@ def __init__(self,
                  variable_name,                  
                  variable_dof) :
         DeepEval.__init__(self, model_file)
-        self.model_file = model_file
-        self.graph = self.load_graph (self.model_file)
+        # self.model_file = model_file
+        # self.graph = self.load_graph (self.model_file)
         self.variable_name = variable_name
         self.variable_dof = variable_dof
         # checkout input/output tensors from graph
diff --git a/source/train/DeepPot.py b/source/train/DeepPot.py
index dc62e56d8e..92c931377a 100644
--- a/source/train/DeepPot.py
+++ b/source/train/DeepPot.py
@@ -3,13 +3,15 @@
 import os,sys
 import numpy as np
 from deepmd.env import tf
+from deepmd.common import make_default_mesh
 from deepmd.DeepEval import DeepEval
 
 class DeepPot (DeepEval) :
     def __init__(self, 
                  model_file) :
-        self.model_file = model_file
-        self.graph = self.load_graph (self.model_file)
+        DeepEval.__init__(self, model_file)
+        # self.model_file = model_file
+        # self.graph = self.load_graph (self.model_file)
         # checkout input/output tensors from graph
         self.t_ntypes = self.graph.get_tensor_by_name ('load/descrpt_attr/ntypes:0')
         self.t_rcut   = self.graph.get_tensor_by_name ('load/descrpt_attr/rcut:0')
@@ -110,7 +112,7 @@ def eval(self,
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types)
         assert(natoms_vec[0] == natoms)
-        default_mesh = self.make_default_mesh(cells)
+        default_mesh = make_default_mesh(cells)
 
         # evaluate
         energy = []
@@ -120,7 +122,7 @@ def eval(self,
         av = []
         feed_dict_test = {}
         feed_dict_test[self.t_natoms] = natoms_vec
-        feed_dict_test[self.t_mesh  ] = default_mesh
+        feed_dict_test[self.t_mesh  ] = default_mesh[0]
         feed_dict_test[self.t_type  ] = atom_types
         t_out = [self.t_energy, 
                  self.t_force, 
diff --git a/source/train/EwaldRecp.py b/source/train/EwaldRecp.py
index 619fd0d514..4e48e5a0cf 100644
--- a/source/train/EwaldRecp.py
+++ b/source/train/EwaldRecp.py
@@ -26,17 +26,18 @@ def __init__(self,
                  beta):
         self.hh = hh
         self.beta = beta
-        self.sess = tf.Session()
-        # place holders
-        self.t_nloc       = tf.placeholder(tf.int32, [1], name = "t_nloc")
-        self.t_coord      = tf.placeholder(global_tf_float_precision, [None], name='t_coord')
-        self.t_charge     = tf.placeholder(global_tf_float_precision, [None], name='t_charge')
-        self.t_box        = tf.placeholder(global_tf_float_precision, [None], name='t_box')
-        
-        self.t_energy, self.t_force, self.t_virial \
-            = op_module.ewald_recp(self.t_coord, self.t_charge, self.t_nloc, self.t_box, 
-                                   ewald_h = self.hh,
-                                   ewald_beta = self.beta)
+        with tf.Graph().as_default() as graph:
+            # place holders
+            self.t_nloc       = tf.placeholder(tf.int32, [1], name = "t_nloc")
+            self.t_coord      = tf.placeholder(global_tf_float_precision, [None], name='t_coord')
+            self.t_charge     = tf.placeholder(global_tf_float_precision, [None], name='t_charge')
+            self.t_box        = tf.placeholder(global_tf_float_precision, [None], name='t_box')
+            # output            
+            self.t_energy, self.t_force, self.t_virial \
+                = op_module.ewald_recp(self.t_coord, self.t_charge, self.t_nloc, self.t_box, 
+                                       ewald_h = self.hh,
+                                       ewald_beta = self.beta)
+        self.sess = tf.Session(graph=graph)
 
     def eval(self, 
              coord, 
diff --git a/source/train/Fitting.py b/source/train/Fitting.py
index cd230d5012..8ee8c80f32 100644
--- a/source/train/Fitting.py
+++ b/source/train/Fitting.py
@@ -539,6 +539,9 @@ def __init__ (self, jdata, descrpt) :
     def get_sel_type(self):
         return self.sel_type
 
+    def get_out_size(self):
+        return 3
+
     def build (self, 
                input_d,
                rot_mat,
@@ -587,3 +590,4 @@ def build (self,
             count += 1
 
         return tf.reshape(outs, [-1])
+        # return tf.reshape(outs, [tf.shape(inputs)[0] * natoms[0] * 3 // 3])

From 72e9933455398312fa26f9d6d3e505d28f52fa21 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 21 Nov 2019 18:09:46 +0800
Subject: [PATCH 27/38] fix bug in data modifier: order of fcorr should be
 mapped back

---
 source/train/DataModifier.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
index ee2ff0d3cb..b55a0af53a 100644
--- a/source/train/DataModifier.py
+++ b/source/train/DataModifier.py
@@ -259,6 +259,8 @@ def eval_fv(self, coords, cells, atom_types, ext_f) :
             = self.sess.run([self.force, self.virial, self.av],
                             feed_dict = feed_dict_test)
         # print('fout: ', fout.shape, fout)
+        fout = self.reverse_map(np.reshape(fout, [nframes,-1,3]), imap)
+        fout = np.reshape(fout, [nframes, -1])
         return fout, vout, avout
 
 

From 44bb07ac587ef7881e38c0deba9ba28590131216 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 Nov 2019 18:16:29 +0800
Subject: [PATCH 28/38] first working version of force modification

---
 source/lib/include/DataModifier.h |  46 ++++++
 source/lib/include/DeepTensor.h   |   2 +-
 source/lib/include/NNPAtomMap.h   |   1 +
 source/lib/include/common.h       |  11 +-
 source/lib/src/DataModifier.cc    | 234 ++++++++++++++++++++++++++++++
 source/lib/src/DeepTensor.cc      |   8 +-
 source/lib/src/common.cc          |   1 +
 7 files changed, 292 insertions(+), 11 deletions(-)
 create mode 100644 source/lib/include/DataModifier.h
 create mode 100644 source/lib/src/DataModifier.cc

diff --git a/source/lib/include/DataModifier.h b/source/lib/include/DataModifier.h
new file mode 100644
index 0000000000..bfa4f31fac
--- /dev/null
+++ b/source/lib/include/DataModifier.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "NNPInter.h"
+
+class DataModifier
+{
+public:
+  DataModifier ();
+  DataModifier  (const string & model, const int & gpu_rank = 0);
+  ~DataModifier () {};
+  void init (const string & model, const int & gpu_rank = 0);
+  void print_summary(const string &pre) const;
+public:
+  void compute (vector<VALUETYPE> &		dfcorr_,
+		vector<VALUETYPE> &		dvcorr_,
+		const vector<VALUETYPE> &	dcoord_,
+		const vector<int> &		datype_,
+		const vector<VALUETYPE> &	dbox, 
+		const vector<pair<int,int>> &	pairs,
+		const vector<VALUETYPE> &	delef_, 
+		const int			nghost,
+		const LammpsNeighborList &	lmp_list);
+  VALUETYPE cutoff () const {assert(inited); return rcut;};
+  int numb_types () const {assert(inited); return ntypes;};
+  vector<int> sel_types () const {assert(inited); return sel_type;};
+private:
+  Session* session;
+  string name_scope, name_prefix;
+  int num_intra_nthreads, num_inter_nthreads;
+  GraphDef graph_def;
+  bool inited;
+  VALUETYPE rcut;
+  VALUETYPE cell_size;
+  int ntypes;
+  string model_type;
+  vector<int> sel_type;
+  template<class VT> VT get_scalar(const string & name) const;
+  template<class VT> void get_vector(vector<VT> & vec, const string & name) const;
+  void run_model (vector<VALUETYPE> &		dforce,
+		  vector<VALUETYPE> &		dvirial,
+		  Session *			session,
+		  const std::vector<std::pair<string, Tensor>> & input_tensors,
+		  const NNPAtomMap<VALUETYPE> &	nnpmap,
+		  const int			nghost);
+};
+
diff --git a/source/lib/include/DeepTensor.h b/source/lib/include/DeepTensor.h
index 14f0f69997..e0c0392692 100644
--- a/source/lib/include/DeepTensor.h
+++ b/source/lib/include/DeepTensor.h
@@ -37,7 +37,7 @@ class DeepTensor
   int odim;
   vector<int> sel_type;
   template<class VT> VT get_scalar(const string & name) const;
-  template<class VT> vector<VT> get_vector(const string & name) const;
+  template<class VT> void get_vector (vector<VT> & vec, const string & name) const;
   void run_model (vector<VALUETYPE> &		d_tensor_,
 		  Session *			session, 
 		  const std::vector<std::pair<string, Tensor>> & input_tensors,
diff --git a/source/lib/include/NNPAtomMap.h b/source/lib/include/NNPAtomMap.h
index e09b3186b6..e07f7e7e6e 100644
--- a/source/lib/include/NNPAtomMap.h
+++ b/source/lib/include/NNPAtomMap.h
@@ -18,6 +18,7 @@ class NNPAtomMap
 		 const int stride = 1) const ;
   const vector<int > & get_type () const {return atype;}
   const vector<int > & get_fwd_map () const {return fwd_idx_map;}
+  const vector<int > & get_bkw_map () const {return idx_map;}
 private:
   vector<int> idx_map;
   vector<int> fwd_idx_map;
diff --git a/source/lib/include/common.h b/source/lib/include/common.h
index 8b92dba15f..837f4b0e49 100644
--- a/source/lib/include/common.h
+++ b/source/lib/include/common.h
@@ -104,8 +104,8 @@ VT
 session_get_scalar(Session* session, const string name, const string scope = "");
 
 template<typename VT>
-vector<VT>
-session_get_vector(Session* session, const string name, const string scope = "");
+void
+session_get_vector(vector<VT> & o_vec, Session* session, const string name_, const string scope = "");
 
 int
 session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
@@ -153,8 +153,8 @@ session_get_scalar(Session* session, const string name_, const string scope)
 }
 
 template<typename VT>
-vector<VT>
-session_get_vector(Session* session, const string name_, const string scope) 
+void
+session_get_vector(vector<VT> & o_vec, Session* session, const string name_, const string scope) 
 {
   string name = name_;
   if (scope != "") {
@@ -168,12 +168,11 @@ session_get_vector(Session* session, const string name_, const string scope)
   Tensor output_rc = output_tensors[0];
   assert(1 == output_rc.shape().dims());
   int dof = output_rc.shape().dim_size(0);
-  vector<VT> o_vec(dof);
+  o_vec.resize(dof);
   auto orc = output_rc.flat <VT> ();
   for (int ii = 0; ii < dof; ++ii){
     o_vec[ii] = orc(ii);
   }  
-  return o_vec;
 }
 
 
diff --git a/source/lib/src/DataModifier.cc b/source/lib/src/DataModifier.cc
new file mode 100644
index 0000000000..09d370a915
--- /dev/null
+++ b/source/lib/src/DataModifier.cc
@@ -0,0 +1,234 @@
+#include "DataModifier.h"
+
+DataModifier::
+DataModifier()
+    : inited (false)
+{
+  name_scope = "load";
+}
+
+DataModifier::
+DataModifier(const string & model, const int & gpu_rank)
+    : inited (false)
+{
+  get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
+  init(model, gpu_rank);  
+}
+
+void
+DataModifier::
+init (const string & model, const int & gpu_rank)
+{
+  assert (!inited);
+  SessionOptions options;
+  options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
+  options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
+  checkStatus(NewSession(options, &session));
+  checkStatus(ReadBinaryProto(Env::Default(), model, &graph_def));
+  checkStatus(session->Create(graph_def));  
+  // int nnodes = graph_def.node_size();
+  // for (int ii = 0; ii < nnodes; ++ii){
+  //   cout << ii << " \t " << graph_def.node(ii).name() << endl;
+  // }
+  rcut = get_scalar<VALUETYPE>("descrpt_attr/rcut");
+  cell_size = rcut;
+  ntypes = get_scalar<int>("descrpt_attr/ntypes");
+  model_type = get_scalar<string>("model_attr/model_type");
+  get_vector<int>(sel_type, "model_attr/sel_type");
+  sort(sel_type.begin(), sel_type.end());
+  inited = true;
+}
+
+template<class VT>
+VT
+DataModifier::
+get_scalar (const string & name) const
+{
+  return session_get_scalar<VT>(session, name, name_scope);
+}
+
+template<class VT>
+void
+DataModifier::
+get_vector (vector<VT> & vec, const string & name) const
+{
+  session_get_vector<VT>(vec, session, name, name_scope);
+}
+
+void 
+DataModifier::
+run_model (vector<VALUETYPE> &		dforce,
+	   vector<VALUETYPE> &		dvirial,
+	   Session *			session, 
+	   const std::vector<std::pair<string, Tensor>> & input_tensors,
+	   const NNPAtomMap<VALUETYPE> &nnpmap, 
+	   const int			nghost)
+{
+  unsigned nloc = nnpmap.get_type().size();
+  unsigned nall = nloc + nghost;
+  if (nloc == 0) {
+    dforce.clear();
+    dvirial.clear();
+    return;
+  }
+
+  std::vector<Tensor> output_tensors;
+  checkStatus (session->Run(input_tensors, 
+			    {"o_dm_force", "o_dm_virial", "o_dm_av"},
+			    {}, 
+			    &output_tensors));
+  int cc = 0;
+  Tensor output_f = output_tensors[cc++];
+  Tensor output_v = output_tensors[cc++];
+  Tensor output_av = output_tensors[cc++];
+  assert (output_f.dims() == 2), "dim of output tensor should be 2";
+  assert (output_v.dims() == 2), "dim of output tensor should be 2";
+  assert (output_av.dims() == 2), "dim of output tensor should be 2";
+  int nframes = output_f.dim_size(0);
+  int natoms = output_f.dim_size(1) / 3;
+  assert (output_f.dim_size(0) == 1), "nframes should match";
+  assert (natoms == nall), "natoms should be nall";
+  assert (output_v.dim_size(0) == nframes), "nframes should match";
+  assert (output_v.dim_size(1) == 9), "dof of virial should be 9";
+  assert (output_av.dim_size(0) == nframes), "nframes should match";
+  assert (output_av.dim_size(1) == natoms * 9), "dof of atom virial should be 9 * natoms";  
+
+  auto of = output_f.flat<VALUETYPE> ();
+  auto ov = output_v.flat<VALUETYPE> ();
+
+  dforce.resize(nall*3);
+  dvirial.resize(9);
+  for (int ii = 0; ii < nall * 3; ++ii){
+    dforce[ii] = of(ii);
+  }
+  for (int ii = 0; ii < 9; ++ii){
+    dvirial[ii] = ov(ii);
+  }
+}
+
+
+
+void
+DataModifier::
+compute (vector<VALUETYPE> &		dfcorr_,
+	 vector<VALUETYPE> &		dvcorr_,
+	 const vector<VALUETYPE> &	dcoord_,
+	 const vector<int> &		datype_,
+	 const vector<VALUETYPE> &	dbox, 
+	 const vector<pair<int,int>> &	pairs,
+	 const vector<VALUETYPE> &	delef_, 
+	 const int			nghost,
+	 const LammpsNeighborList &	lmp_list)
+{
+  // firstly do selection
+  int nall = datype_.size();
+  int nloc = nall - nghost;
+  int nghost_real;
+  vector<int > real_fwd_map, real_bkw_map;
+  select_real_atoms(real_fwd_map, real_bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);  
+  int nall_real = real_bkw_map.size();
+  int nloc_real = nall_real - nghost_real;
+  // resize to nall_real
+  vector<VALUETYPE> dcoord_real;
+  vector<VALUETYPE> delef_real;
+  vector<int> datype_real;
+  dcoord_real.resize(nall_real * 3);
+  delef_real.resize(nall_real * 3);
+  datype_real.resize(nall_real);
+  // fwd map
+  select_map<VALUETYPE>(dcoord_real, dcoord_, real_fwd_map, 3);
+  select_map<VALUETYPE>(delef_real, delef_, real_fwd_map, 3);
+  select_map<int>(datype_real, datype_, real_fwd_map, 1);
+  // internal nlist
+  InternalNeighborList nlist_;
+  convert_nlist_lmp_internal(nlist_, lmp_list);
+  shuffle_nlist_exclude_empty(nlist_, real_fwd_map);  
+  // sort atoms
+  NNPAtomMap<VALUETYPE> nnpmap (datype_real.begin(), datype_real.begin() + nloc_real);
+  assert (nloc_real == nnpmap.get_type().size());
+  const vector<int> & sort_fwd_map(nnpmap.get_fwd_map());
+  const vector<int> & sort_bkw_map(nnpmap.get_bkw_map());
+  // shuffle nlist
+  InternalNeighborList nlist(nlist_);
+  shuffle_nlist (nlist, nnpmap);
+  // make input tensors
+  std::vector<std::pair<string, Tensor>> input_tensors;
+  int ret = session_input_tensors (input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist, vector<VALUETYPE>(), vector<VALUETYPE>(), nnpmap, nghost_real, name_scope);
+  assert (nloc_real == ret);
+  // make bond idx map
+  vector<int > bd_idx(nall, -1);
+  for (int ii = 0; ii < pairs.size(); ++ii){
+    bd_idx[pairs[ii].first] = pairs[ii].second;
+  }
+  // make extf by bond idx map
+  vector<int > dtype_sort_loc = nnpmap.get_type();
+  vector<VALUETYPE> dextf;
+  for(int ii = 0; ii < dtype_sort_loc.size(); ++ii){
+    if (lower_bound(sel_type.begin(), sel_type.end(), dtype_sort_loc[ii]) != sel_type.end()){
+      // selected atom
+      int first_idx = real_bkw_map[sort_bkw_map[ii]];
+      int second_idx = bd_idx[first_idx];
+      assert(second_idx >= 0);
+      dextf.push_back(delef_[second_idx*3+0]);
+      dextf.push_back(delef_[second_idx*3+1]);
+      dextf.push_back(delef_[second_idx*3+2]);
+    }
+  }
+  // dextf should be loc and virtual
+  assert(dextf.size() == (nloc - nloc_real)*3);
+  // make tensor for extf
+  int nframes = 1;
+  TensorShape extf_shape ;
+  extf_shape.AddDim (nframes);
+  extf_shape.AddDim (dextf.size());
+#ifdef HIGH_PREC
+  Tensor extf_tensor	(DT_DOUBLE, extf_shape);
+#else
+  Tensor extf_tensor	(DT_FLOAT, extf_shape);
+#endif
+  auto extf = extf_tensor.matrix<VALUETYPE> ();
+  for (int ii = 0; ii < nframes; ++ii){
+    for (int jj = 0; jj < extf.size(); ++jj){
+      extf(ii,jj) = dextf[jj];
+    }
+  }
+  // append extf to input tensor
+  input_tensors.push_back({"t_ef", extf_tensor});  
+  // run model
+  vector<VALUETYPE> dfcorr, dvcorr;
+  run_model (dfcorr, dvcorr, session, input_tensors, nnpmap, nghost_real);
+  assert(dfcorr.size() == nall_real * 3);
+  // back map force
+  vector<VALUETYPE> dfcorr_1 = dfcorr;
+  nnpmap.backward (dfcorr_1.begin(), dfcorr.begin(), 3);
+  assert(dfcorr_1.size() == nall_real * 3);
+  // resize to all and clear
+  vector<VALUETYPE> dfcorr_2(nall*3);
+  fill(dfcorr_2.begin(), dfcorr_2.end(), 0.0);
+  // back map to original position
+  for (int ii = 0; ii < nall_real; ++ii){
+    for (int dd = 0; dd < 3; ++dd){
+      dfcorr_2[real_bkw_map[ii]*3+dd] += dfcorr_1[ii*3+dd];
+    }
+  }
+  // self correction of bonded force
+  for (int ii = 0; ii < pairs.size(); ++ii){
+    for (int dd = 0; dd < 3; ++dd){
+      dfcorr_2[pairs[ii].first*3+dd] += delef_[pairs[ii].second*3+dd];
+    }    
+  }
+  // add ele contrinution
+  dfcorr_ = dfcorr_2;
+  // for (int ii = 0; ii < nloc; ++ii){
+  //   for (int dd = 0; dd < 3; ++dd){
+  //     dfcorr_[ii*3+dd] += delef_[ii*3+dd];
+  //   }
+  // }  
+  for (int ii = 0; ii < real_bkw_map.size(); ++ii){
+    int oii = real_bkw_map[ii];
+    for (int dd = 0; dd < 3; ++dd){
+      dfcorr_[oii*3+dd] += delef_[oii*3+dd];
+    }    
+  }
+  dvcorr_ = dvcorr;
+}
diff --git a/source/lib/src/DeepTensor.cc b/source/lib/src/DeepTensor.cc
index ea0a7579a8..4e6172a25f 100644
--- a/source/lib/src/DeepTensor.cc
+++ b/source/lib/src/DeepTensor.cc
@@ -30,7 +30,7 @@ init (const string & model, const int & gpu_rank)
   ntypes = get_scalar<int>("descrpt_attr/ntypes");
   model_type = get_scalar<string>("model_attr/model_type");
   odim = get_scalar<int>("model_attr/output_dim");
-  sel_type = get_vector<int>("model_attr/sel_type");
+  get_vector<int>(sel_type, "model_attr/sel_type");
   inited = true;
 }
 
@@ -43,11 +43,11 @@ get_scalar (const string & name) const
 }
 
 template<class VT>
-vector<VT>
+void
 DeepTensor::
-get_vector (const string & name) const
+get_vector (vector<VT> & vec, const string & name) const
 {
-  return session_get_vector<VT>(session, name);
+  session_get_vector<VT>(vec, session, name);
 }
 
 void 
diff --git a/source/lib/src/common.cc b/source/lib/src/common.cc
index 708de71b5d..9d1f1f0ff7 100644
--- a/source/lib/src/common.cc
+++ b/source/lib/src/common.cc
@@ -127,6 +127,7 @@ shuffle_nlist_exclude_empty (InternalNeighborList & list,
   new_jrange.resize(new_ilist.size()+1);
   new_jrange[0] = 0;
   for(int ii = 0; ii < list.ilist.size(); ++ii){
+    if (list.ilist[ii] < 0) continue;
     int js = list.jrange[ii];
     int je = list.jrange[ii+1];
     int cc = 0;

From 06ea446aa9a39c1622b752437dcac3d406004ff0 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 22 Nov 2019 19:27:21 +0800
Subject: [PATCH 29/38] fix bug of over-boundary back map

---
 source/lib/src/DataModifier.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/lib/src/DataModifier.cc b/source/lib/src/DataModifier.cc
index 09d370a915..8eb3dd26f9 100644
--- a/source/lib/src/DataModifier.cc
+++ b/source/lib/src/DataModifier.cc
@@ -224,7 +224,7 @@ compute (vector<VALUETYPE> &		dfcorr_,
   //     dfcorr_[ii*3+dd] += delef_[ii*3+dd];
   //   }
   // }  
-  for (int ii = 0; ii < real_bkw_map.size(); ++ii){
+  for (int ii = 0; ii < nloc_real; ++ii){
     int oii = real_bkw_map[ii];
     for (int dd = 0; dd < 3; ++dd){
       dfcorr_[oii*3+dd] += delef_[oii*3+dd];

From 98eef651005f861339297d30d138ec82a27e2d24 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 25 Nov 2019 14:51:48 +0800
Subject: [PATCH 30/38] handel the case of nloc_real == 0

---
 source/lib/src/DataModifier.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/source/lib/src/DataModifier.cc b/source/lib/src/DataModifier.cc
index 8eb3dd26f9..bf53e2a7c8 100644
--- a/source/lib/src/DataModifier.cc
+++ b/source/lib/src/DataModifier.cc
@@ -128,6 +128,13 @@ compute (vector<VALUETYPE> &		dfcorr_,
   select_real_atoms(real_fwd_map, real_bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);  
   int nall_real = real_bkw_map.size();
   int nloc_real = nall_real - nghost_real;
+  if (nloc_real == 0){
+    dfcorr_.resize(nall * 3);
+    dvcorr_.resize(9);
+    fill(dfcorr_.begin(), dfcorr_.end(), 0.0);
+    fill(dvcorr_.begin(), dvcorr_.end(), 0.0);
+    return;
+  }
   // resize to nall_real
   vector<VALUETYPE> dcoord_real;
   vector<VALUETYPE> delef_real;

From 68f740d8ff1ae6549b164e82f2e0f8cec14a03ad Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Tue, 26 Nov 2019 00:22:38 +0800
Subject: [PATCH 31/38] fix bug of excluding empty from nlist

---
 source/lib/src/common.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/source/lib/src/common.cc b/source/lib/src/common.cc
index 9d1f1f0ff7..4c20869f1e 100644
--- a/source/lib/src/common.cc
+++ b/source/lib/src/common.cc
@@ -126,6 +126,7 @@ shuffle_nlist_exclude_empty (InternalNeighborList & list,
   }
   new_jrange.resize(new_ilist.size()+1);
   new_jrange[0] = 0;
+  int ci = 0;
   for(int ii = 0; ii < list.ilist.size(); ++ii){
     if (list.ilist[ii] < 0) continue;
     int js = list.jrange[ii];
@@ -137,7 +138,8 @@ shuffle_nlist_exclude_empty (InternalNeighborList & list,
 	cc++;
       }      
     }
-    new_jrange[ii+1] = new_jrange[ii] + cc;
+    new_jrange[ci+1] = new_jrange[ci] + cc;
+    ci ++;
   }
   list.ilist = new_ilist;
   list.jrange = new_jrange;

From c655b6e58637a34bd94e56ba46a61164868e485a Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Tue, 26 Nov 2019 13:03:00 +0800
Subject: [PATCH 32/38] pass name_scope at the interface of DeepTensor and
 DataModifier

---
 source/lib/include/DataModifier.h | 10 +++++++---
 source/lib/include/DeepTensor.h   | 11 ++++++++---
 source/lib/include/common.h       |  2 ++
 source/lib/src/DataModifier.cc    | 14 +++++++++-----
 source/lib/src/DeepTensor.cc      | 21 +++++++++++++--------
 source/lib/src/common.cc          | 10 ++++++++++
 6 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/source/lib/include/DataModifier.h b/source/lib/include/DataModifier.h
index bfa4f31fac..838b1463ec 100644
--- a/source/lib/include/DataModifier.h
+++ b/source/lib/include/DataModifier.h
@@ -5,10 +5,14 @@
 class DataModifier
 {
 public:
-  DataModifier ();
-  DataModifier  (const string & model, const int & gpu_rank = 0);
+  DataModifier();
+  DataModifier(const string & model, 
+	       const int & gpu_rank = 0, 
+	       const string & name_scope = "");
   ~DataModifier () {};
-  void init (const string & model, const int & gpu_rank = 0);
+  void init (const string & model, 
+	     const int & gpu_rank = 0, 
+	     const string & name_scope = "");
   void print_summary(const string &pre) const;
 public:
   void compute (vector<VALUETYPE> &		dfcorr_,
diff --git a/source/lib/include/DeepTensor.h b/source/lib/include/DeepTensor.h
index e0c0392692..bfc969d7eb 100644
--- a/source/lib/include/DeepTensor.h
+++ b/source/lib/include/DeepTensor.h
@@ -5,9 +5,13 @@
 class DeepTensor
 {
 public:
-  DeepTensor ();
-  DeepTensor  (const string & model, const int & gpu_rank = 0);
-  void init (const string & model, const int & gpu_rank = 0);
+  DeepTensor();
+  DeepTensor(const string & model, 
+	     const int & gpu_rank = 0, 
+	     const string &name_scope = "");
+  void init (const string & model, 
+	     const int & gpu_rank = 0, 
+	     const string &name_scope = "");
   void print_summary(const string &pre) const;
 public:
   void compute (vector<VALUETYPE> &		value,
@@ -27,6 +31,7 @@ class DeepTensor
   vector<int> sel_types () const {assert(inited); return sel_type;};
 private:
   Session* session;
+  string name_scope;
   int num_intra_nthreads, num_inter_nthreads;
   GraphDef graph_def;
   bool inited;
diff --git a/source/lib/include/common.h b/source/lib/include/common.h
index 837f4b0e49..2f8e4ba949 100644
--- a/source/lib/include/common.h
+++ b/source/lib/include/common.h
@@ -99,6 +99,8 @@ get_env_nthreads(int & num_intra_nthreads,
 void
 checkStatus(const tensorflow::Status& status);
 
+string name_prefix(const string & name_scope);
+
 template<typename VT>
 VT
 session_get_scalar(Session* session, const string name, const string scope = "");
diff --git a/source/lib/src/DataModifier.cc b/source/lib/src/DataModifier.cc
index bf53e2a7c8..ff15486281 100644
--- a/source/lib/src/DataModifier.cc
+++ b/source/lib/src/DataModifier.cc
@@ -4,12 +4,13 @@ DataModifier::
 DataModifier()
     : inited (false)
 {
-  name_scope = "load";
 }
 
 DataModifier::
-DataModifier(const string & model, const int & gpu_rank)
-    : inited (false)
+DataModifier(const string & model, 
+	     const int & gpu_rank, 
+	     const string &name_scope_)
+    : inited (false), name_scope(name_scope_)
 {
   get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
   init(model, gpu_rank);  
@@ -17,9 +18,12 @@ DataModifier(const string & model, const int & gpu_rank)
 
 void
 DataModifier::
-init (const string & model, const int & gpu_rank)
-{
+init (const string & model, 
+      const int & gpu_rank, 
+      const string &name_scope_)
+{  
   assert (!inited);
+  name_scope = name_scope_;
   SessionOptions options;
   options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
   options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
diff --git a/source/lib/src/DeepTensor.cc b/source/lib/src/DeepTensor.cc
index 4e6172a25f..bd3a0ea138 100644
--- a/source/lib/src/DeepTensor.cc
+++ b/source/lib/src/DeepTensor.cc
@@ -7,8 +7,10 @@ DeepTensor()
 }
 
 DeepTensor::
-DeepTensor(const string & model, const int & gpu_rank)
-    : inited (false)
+DeepTensor(const string & model, 
+	   const int & gpu_rank, 
+	   const string &name_scope_)
+    : inited (false), name_scope(name_scope_)
 {
   get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
   init(model, gpu_rank);  
@@ -16,9 +18,12 @@ DeepTensor(const string & model, const int & gpu_rank)
 
 void
 DeepTensor::
-init (const string & model, const int & gpu_rank)
+init (const string & model, 
+      const int & gpu_rank, 
+      const string &name_scope_)
 {
   assert (!inited);
+  name_scope = name_scope_;
   SessionOptions options;
   options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
   options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
@@ -39,7 +44,7 @@ VT
 DeepTensor::
 get_scalar (const string & name) const
 {
-  return session_get_scalar<VT>(session, name);
+  return session_get_scalar<VT>(session, name, name_scope);
 }
 
 template<class VT>
@@ -47,7 +52,7 @@ void
 DeepTensor::
 get_vector (vector<VT> & vec, const string & name) const
 {
-  session_get_vector<VT>(vec, session, name);
+  session_get_vector<VT>(vec, session, name, name_scope);
 }
 
 void 
@@ -68,7 +73,7 @@ run_model (vector<VALUETYPE> &		d_tensor_,
 
   std::vector<Tensor> output_tensors;
   checkStatus (session->Run(input_tensors, 
-			    {"o_" + model_type},
+			    {name_prefix(name_scope) + "o_" + model_type},
 			    {}, 
 			    &output_tensors));
   
@@ -148,7 +153,7 @@ compute_inner (vector<VALUETYPE> &		dtensor_,
   assert (nloc == nnpmap.get_type().size());
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, vector<VALUETYPE>(), vector<VALUETYPE>(), nnpmap, nghost);
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, vector<VALUETYPE>(), vector<VALUETYPE>(), nnpmap, nghost, name_scope);
   assert (ret == nloc);
 
   run_model (dtensor_, session, input_tensors, nnpmap, nghost);
@@ -172,7 +177,7 @@ compute_inner (vector<VALUETYPE> &		dtensor_,
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, vector<VALUETYPE>(), vector<VALUETYPE>(), nnpmap, nghost);
+  int ret = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, vector<VALUETYPE>(), vector<VALUETYPE>(), nnpmap, nghost, name_scope);
   assert (nloc == ret);
 
   run_model (dtensor_, session, input_tensors, nnpmap, nghost);
diff --git a/source/lib/src/common.cc b/source/lib/src/common.cc
index 4c20869f1e..97d3c7fa9a 100644
--- a/source/lib/src/common.cc
+++ b/source/lib/src/common.cc
@@ -176,6 +176,16 @@ get_env_nthreads(int & num_intra_nthreads,
   }
 }
 
+string
+name_prefix(const string & scope)
+{
+  string prefix = "";
+  if (scope != ""){
+    prefix = scope + "/";
+  }
+  return prefix;
+}
+
 int
 session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		       const vector<VALUETYPE> &	dcoord_,

From 2a47c3ee5df356cea872515935098f1fd14e83d0 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 2 Dec 2019 21:05:37 +0800
Subject: [PATCH 33/38] fix bug of make_default_mesh

---
 source/train/DeepEval.py |  3 +--
 source/train/DeepPot.py  |  3 +--
 source/train/common.py   | 30 +++++++++++++++++++-----------
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/source/train/DeepEval.py b/source/train/DeepEval.py
index e1156eefa7..ba383f245f 100644
--- a/source/train/DeepEval.py
+++ b/source/train/DeepEval.py
@@ -170,7 +170,6 @@ def eval(self,
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types)
         assert(natoms_vec[0] == natoms)
-        default_mesh = make_default_mesh(cells)
 
         # evaluate
         tensor = []
@@ -181,7 +180,7 @@ def eval(self,
         for ii in range(nframes) :
             feed_dict_test[self.t_coord] = np.reshape(coords[ii:ii+1, :], [-1])
             feed_dict_test[self.t_box  ] = np.reshape(cells [ii:ii+1, :], [-1])
-            feed_dict_test[self.t_mesh ] = default_mesh[ii]
+            feed_dict_test[self.t_mesh ] = make_default_mesh(cells[ii:ii+1, :])
             v_out = self.sess.run (t_out, feed_dict = feed_dict_test)
             tensor.append(v_out[0])
 
diff --git a/source/train/DeepPot.py b/source/train/DeepPot.py
index 92c931377a..9430216c02 100644
--- a/source/train/DeepPot.py
+++ b/source/train/DeepPot.py
@@ -112,7 +112,6 @@ def eval(self,
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types)
         assert(natoms_vec[0] == natoms)
-        default_mesh = make_default_mesh(cells)
 
         # evaluate
         energy = []
@@ -122,7 +121,6 @@ def eval(self,
         av = []
         feed_dict_test = {}
         feed_dict_test[self.t_natoms] = natoms_vec
-        feed_dict_test[self.t_mesh  ] = default_mesh[0]
         feed_dict_test[self.t_type  ] = atom_types
         t_out = [self.t_energy, 
                  self.t_force, 
@@ -133,6 +131,7 @@ def eval(self,
         for ii in range(nframes) :
             feed_dict_test[self.t_coord] = np.reshape(coords[ii:ii+1, :], [-1])
             feed_dict_test[self.t_box  ] = np.reshape(cells [ii:ii+1, :], [-1])
+            feed_dict_test[self.t_mesh ] = make_default_mesh(cells[ii:ii+1, :])
             if self.has_fparam:
                 feed_dict_test[self.t_fparam] = np.reshape(fparam[ii:ii+1, :], [-1])
             if self.has_aparam:
diff --git a/source/train/common.py b/source/train/common.py
index c322f0e9c4..7eb6e19381 100644
--- a/source/train/common.py
+++ b/source/train/common.py
@@ -27,18 +27,26 @@ def select_idx_map(atom_type,
     return idx_map
 
 
-def make_default_mesh(test_box, cell_size = 3) :
+def make_default_mesh(test_box, cell_size = 3.0) :
+    # nframes = test_box.shape[0]
+    # default_mesh = np.zeros([nframes, 6], dtype = np.int32)
+    # for ff in range(nframes):
+    #     ncell = np.ones (3, dtype=np.int32)
+    #     for ii in range(3) :
+    #         ncell[ii] = int ( np.linalg.norm(test_box[ff][ii]) / cell_size )
+    #         if (ncell[ii] < 2) : ncell[ii] = 2
+    #     default_mesh[ff][3] = ncell[0]
+    #     default_mesh[ff][4] = ncell[1]
+    #     default_mesh[ff][5] = ncell[2]
+    # return default_mesh
     nframes = test_box.shape[0]
-    default_mesh = np.zeros([nframes, 6], dtype = np.int32)
-    for ff in range(nframes):
-        ncell = np.ones (3, dtype=np.int32)
-        for ii in range(3) :
-            ncell[ii] = int ( np.linalg.norm(test_box[ff][ii]) / cell_size )
-            if (ncell[ii] < 2) : ncell[ii] = 2
-        default_mesh[ff][3] = ncell[0]
-        default_mesh[ff][4] = ncell[1]
-        default_mesh[ff][5] = ncell[2]
-    return default_mesh
+    lboxv = np.linalg.norm(test_box.reshape([-1, 3, 3]), axis = 2)
+    avg_lboxv = np.average(lboxv, axis = 0)
+    ncell = (avg_lboxv / cell_size).astype(np.int32)
+    ncell[ncell < 2] = 2
+    default_mesh = np.zeros (6, dtype = np.int32)
+    default_mesh[3:6] = ncell
+    return default_mesh    
 
 
 class ClassArg () : 

From 33f9e13380f85623266197d8728b492dd175c5df Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Mon, 2 Dec 2019 22:32:55 +0800
Subject: [PATCH 34/38] simplify atype for eval_modify. sort in eval_modify

---
 source/tests/test_data_modifier.py |  3 ++-
 source/train/DataModifier.py       | 17 ++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/source/tests/test_data_modifier.py b/source/tests/test_data_modifier.py
index dd41a27863..cdf9828a4d 100644
--- a/source/tests/test_data_modifier.py
+++ b/source/tests/test_data_modifier.py
@@ -99,7 +99,7 @@ def _test_fv (self):
                                    0.25)
         data = Data()
         coord, box, atype = data.get_data()
-
+        atype = atype[0]
         ve, vf, vv = dcm.eval_modify(coord, box, atype)
 
         hh = global_default_fv_hh
@@ -109,6 +109,7 @@ def _test_fv (self):
         nframes = coord.shape[0]
         ndof = coord.shape[1]
         natoms = ndof // 3
+        vf = np.reshape(vf, [nframes, -1])
         for ii in range(ndof):
             coordp = np.copy(coord)
             coordm = np.copy(coord)
diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
index b55a0af53a..4adf3928c1 100644
--- a/source/train/DataModifier.py
+++ b/source/train/DataModifier.py
@@ -164,16 +164,17 @@ def _slice_descrpt_deriv(self, deriv):
 
 
     def eval_modify(self, coord, box, atype, eval_fv = True):
+        coord, atype, imap = self.sort_input(coord, atype)
         natoms = coord.shape[1] // 3
         nframes = coord.shape[0]
         box = np.reshape(box, [nframes, 9])
-        atype = np.reshape(atype, [nframes, natoms])
-        sel_idx_map = select_idx_map(atype[0], self.sel_type)
+        atype = np.reshape(atype, [natoms])
+        sel_idx_map = select_idx_map(atype, self.sel_type)
         nsel = len(sel_idx_map)
         # setup charge
         charge = np.zeros([natoms])
         for ii in range(natoms):
-            charge[ii] = self.sys_charge_map[atype[0][ii]]
+            charge[ii] = self.sys_charge_map[atype[ii]]
         charge = np.tile(charge, [nframes, 1])
 
         # add wfcc
@@ -205,7 +206,7 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
             corr_v = []
             corr_av = []
             for ii in range(0,nframes,batch_size):
-                f, v, av = self.eval_fv(coord[ii:ii+batch_size], box[ii:ii+batch_size], atype[0], ext_f[ii:ii+batch_size])
+                f, v, av = self.eval_fv(coord[ii:ii+batch_size], box[ii:ii+batch_size], atype, ext_f[ii:ii+batch_size])
                 corr_f.append(f)
                 corr_v.append(v)
                 corr_av.append(av)
@@ -216,6 +217,7 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
             for ii in range(nsel):            
                 orig_idx = sel_idx_map[ii]            
                 tot_f[:,orig_idx*3:orig_idx*3+3] += ext_f[:,ii*3:ii*3+3]                
+            tot_f = self.reverse_map(np.reshape(tot_f, [nframes,-1,3]), imap)
             # compute v
             dipole3 = np.reshape(dipole, [nframes, nsel, 3])
             ext_f3 = np.reshape(ext_f, [nframes, nsel, 3])
@@ -268,20 +270,20 @@ def _extend_system(self, coord, box, atype, charge):
         natoms = coord.shape[1] // 3
         nframes = coord.shape[0]
         # sel atoms and setup ref coord
-        sel_idx_map = select_idx_map(atype[0], self.sel_type)
+        sel_idx_map = select_idx_map(atype, self.sel_type)
         nsel = len(sel_idx_map)
         coord3 = coord.reshape([nframes, natoms, 3])
         ref_coord = coord3[:,sel_idx_map,:]
         ref_coord = np.reshape(ref_coord, [nframes, nsel * 3])
         
-        dipole = self.eval(coord, box, atype[0])
+        dipole = self.eval(coord, box, atype)
         dipole = np.reshape(dipole, [nframes, nsel * 3])
         
         wfcc_coord = ref_coord + dipole
         # wfcc_coord = dipole
         wfcc_charge = np.zeros([nsel])
         for ii in range(nsel):
-            orig_idx = self.sel_type.index(atype[0][sel_idx_map[ii]])
+            orig_idx = self.sel_type.index(atype[sel_idx_map[ii]])
             wfcc_charge[ii] = self.model_charge_map[orig_idx]
         wfcc_charge = np.tile(wfcc_charge, [nframes, 1])
 
@@ -303,6 +305,7 @@ def modify(self,
         coord = data['coord'][:get_nframes,:]
         box = data['box'][:get_nframes,:]
         atype = data['type'][:get_nframes,:]
+        atype = atype[0]
         nframes = coord.shape[0]
 
         tot_e, tot_f, tot_v = self.eval_modify(coord, box, atype)

From 76a4f855519bd2f339f81aecfb67c2348854c1d8 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Tue, 3 Dec 2019 17:33:45 +0800
Subject: [PATCH 35/38] fix bug of DataModifier: increase start index of
 descriptor slicing. add unit test for shuffled data modifier

---
 source/tests/test_data_modifier_shuffle.py | 209 +++++++++++++++++++++
 source/train/DataModifier.py               |   4 +-
 2 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 source/tests/test_data_modifier_shuffle.py

diff --git a/source/tests/test_data_modifier_shuffle.py b/source/tests/test_data_modifier_shuffle.py
new file mode 100644
index 0000000000..d70bd94b3b
--- /dev/null
+++ b/source/tests/test_data_modifier_shuffle.py
@@ -0,0 +1,209 @@
+import os,sys,platform,json
+import numpy as np
+import unittest
+import dpdata
+from deepmd.env import tf
+
+from deepmd.common import j_must_have, data_requirement
+from deepmd.RunOptions import RunOptions
+from deepmd.Trainer import NNPTrainer
+from deepmd.DataSystem import DeepmdDataSystem
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.EwaldRecp import EwaldRecp
+from deepmd.DataModifier import DipoleChargeModifier
+from deepmd.DeepDipole import DeepDipole
+
+from common import Data
+
+if global_np_float_precision == np.float32 :
+    global_default_fv_hh = 1e-2
+    global_default_dw_hh = 1e-2
+    global_default_places = 3
+else :
+    global_default_fv_hh = 1e-6
+    global_default_dw_hh = 1e-4
+    global_default_places = 5
+
+modifier_datapath = 'data_modifier'
+
+class Args() :
+    # INPUT = os.path.join(modifier_datapath, 'dipole.json')
+    restart = None
+    init_model = None
+    inter_threads = 0
+
+class TestDataModifier (unittest.TestCase) :
+
+    def setUp(self):
+        # with tf.variable_scope('load', reuse = False) :
+        tf.reset_default_graph()        
+        self._setUp()
+
+    def tearDown(self):
+        tf.reset_default_graph()        
+
+    def _setUp(self):
+        args = Args()
+        run_opt = RunOptions(args, False)
+        jdata = self._setUp_jdata()
+        self._setUp_data()
+
+        # init model
+        model = NNPTrainer (jdata, run_opt = run_opt)
+        rcut = model.model.get_rcut()
+
+        # init data system
+        systems = j_must_have(jdata['training'], 'systems')
+        set_pfx = j_must_have(jdata['training'], 'set_prefix')
+        batch_size = j_must_have(jdata['training'], 'batch_size')
+        test_size = j_must_have(jdata['training'], 'numb_test')    
+        data = DeepmdDataSystem(systems, 
+                                batch_size, 
+                                test_size, 
+                                rcut, 
+                                set_prefix=set_pfx, 
+                                run_opt=run_opt)
+        data.add_dict(data_requirement)
+
+        # clear the default graph
+        tf.reset_default_graph()
+
+        # build the model with stats from the first system
+        model.build (data)
+        
+        # freeze the graph
+        with tf.Session() as sess:
+            init_op = tf.global_variables_initializer()
+            sess.run(init_op)
+            graph = tf.get_default_graph()
+            input_graph_def = graph.as_graph_def()
+            nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
+            output_graph_def = tf.graph_util.convert_variables_to_constants(
+                sess,
+                input_graph_def,
+                nodes.split(",") 
+            )
+            output_graph = os.path.join(modifier_datapath, 'dipole.pb')
+            with tf.gfile.GFile(output_graph, "wb") as f:
+                f.write(output_graph_def.SerializeToString())
+
+    def _setUp_data(self):        
+        jdata = self._setUp_jdata()
+        # sys0
+        self.atom_types0 = np.array([0, 3, 2, 1, 3, 4, 1, 4], dtype = int)
+        self.natoms = len(self.atom_types0)
+        self.nframes = 1
+        scale = 10.0
+        self.sel_type = jdata['model']['fitting_net']['dipole_type']
+        self.nsel = 0
+        for ii in self.sel_type:
+            self.nsel += np.sum(self.atom_types0 == ii)
+        self.coords0 = np.random.random([self.nframes, self.natoms * 3]) * scale
+        self.dipoles0 = np.random.random([self.nframes, self.nsel * 3]) 
+        self.box0 = np.reshape(np.eye(3) * scale, [-1, 9])
+        self.box0 = np.tile(self.box0, [self.nframes, 1])
+        self._write_sys_data('data_modifier/sys_test_0', 
+                             self.atom_types0, self.coords0, self.dipoles0, self.box0)
+        # sys1
+        self.idx_map = np.array([6, 7, 1, 0, 5, 2, 4, 3], dtype = int)
+        self.sel_idx_map = np.array([3, 0, 2, 1], dtype = int)
+        self.atom_types1 = self.atom_types0[self.idx_map]        
+        self.coords1 = np.reshape(self.coords0, [self.nframes, -1, 3])
+        self.coords1 = self.coords1[:,self.idx_map,:]
+        self.coords1 = np.reshape(self.coords1, [self.nframes, self.natoms*3])
+        self.dipoles1 = self.dipoles0[:,self.sel_idx_map]
+        self.box1 = self.box0
+
+    def _write_sys_data(self, dirname, atom_types, coords, dipoles, box):
+        os.makedirs(dirname, exist_ok = True)
+        os.makedirs(dirname+'/set.0', exist_ok = True)
+        np.savetxt(os.path.join(dirname, 'type.raw'), atom_types, fmt = '%d')
+        np.save(os.path.join(dirname, 'set.0', 'coord.npy'), coords)
+        np.save(os.path.join(dirname, 'set.0', 'dipole.npy'), dipoles)
+        np.save(os.path.join(dirname, 'set.0', 'box.npy'), box)
+
+    def _setUp_jdata(self):
+        aa = {"a":[1,2,3]}
+        jdata = {
+            "model":{
+	        "type_map":		["A", "B", "C", "D", "E"],
+	        "descriptor" :{
+	            "type":		"se_a",
+	            "sel":              [50, 50, 50, 50, 50],
+	            "rcut_smth":	3.80,
+	            "rcut":		4.00,
+	            "neuron":		[2, 4],
+	            "resnet_dt":	False,
+	            "axis_neuron":	4,
+	            "seed":		1,
+	        },
+	        "fitting_net": {
+	            "type":		"dipole",
+	            "dipole_type":	[1, 3],
+	            "neuron":		[10],
+	            "resnet_dt":	True,
+	            "seed":		1,
+	        },
+            },
+            "learning_rate" :{
+	        "type":		"exp",
+	        "start_lr":	0.01,
+	        "decay_steps":	5000,
+	        "decay_rate":	0.95,
+            },
+            "training": {
+	        "systems":	["data_modifier/sys_test_0"], 
+	        "set_prefix":	"set",    
+	        "stop_batch":	1000000,
+	        "batch_size":	1,
+	        "numb_test":	2,
+            },
+        }
+        return jdata
+
+
+    def test_z_dipole(self):
+        dd = DeepDipole(os.path.join(modifier_datapath, "dipole.pb"))
+            
+        dv0 = dd.eval(self.coords0, self.box0, self.atom_types0)
+        dv1 = dd.eval(self.coords1, self.box1, self.atom_types1)
+
+        dv01 = dv0.reshape([self.nframes, -1, 3])
+        dv01 = dv01[:,self.sel_idx_map, :]
+        dv01 = dv01.reshape([self.nframes, -1])
+        dv1 = dv1.reshape([self.nframes, -1])
+
+        for ii in range(self.nframes):
+            for jj in range(self.nsel):
+                self.assertAlmostEqual(
+                    dv01[ii][jj], dv1[ii][jj], 
+                    msg = "dipole [%d,%d] dose not match" % (ii, jj))
+
+
+    def test_modify(self):
+        dcm = DipoleChargeModifier(os.path.join(modifier_datapath, "dipole.pb"),
+                                   [-1, -3],
+                                   [1, 1, 1, 1, 1],
+                                   1,
+                                   0.25)
+        ve0, vf0, vv0 = dcm.eval_modify(self.coords0, self.box0, self.atom_types0)
+        ve1, vf1, vv1 = dcm.eval_modify(self.coords1, self.box1, self.atom_types1)
+        vf01 = vf0[:,self.idx_map, :]
+
+        for ii in range(self.nframes):
+            self.assertAlmostEqual(ve0[ii], ve1[ii], 
+                                   msg = 'energy %d should match' % ii)
+        for ii in range(self.nframes):
+            for jj in range(9):
+                self.assertAlmostEqual(vv0[ii][jj], vv1[ii][jj], 
+                                       msg = 'virial [%d,%d] should match' % (ii,jj))
+        for ii in range(self.nframes):
+            for jj in range(self.natoms):
+                for dd in range(3):
+                    self.assertAlmostEqual(
+                        vf01[ii][jj][dd], vf1[ii][jj][dd], 
+                        msg = "force [%d,%d,%d] dose not match" % (ii,jj,dd))
+                    
+        
diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
index 4adf3928c1..4fa57d9d86 100644
--- a/source/train/DataModifier.py
+++ b/source/train/DataModifier.py
@@ -152,14 +152,14 @@ def _enrich(self, dipole, dof = 3):
 
     def _slice_descrpt_deriv(self, deriv):
         coll = []
-        start_idx = 0        
+        start_idx = 0
         for type_i in range(self.ntypes):
             if type_i in self.sel_type:
                 di = tf.slice(deriv, 
                               [ 0, start_idx               * self.ndescrpt],
                               [-1, self.t_natoms[2+type_i] * self.ndescrpt])
                 coll.append(di)
-                start_idx += self.t_natoms[2+type_i]
+            start_idx += self.t_natoms[2+type_i]
         return tf.concat(coll, axis = 1)        
 
 

From 9ec7cf49c5a8c3267a7967cd33635af74af105b5 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 6 Dec 2019 12:23:45 +0800
Subject: [PATCH 36/38] fix bug of binary search. add source files pppm_dplr,
 fix_dplr

---
 source/lib/include/DeepTensor.h |   2 +-
 source/lib/src/DataModifier.cc  |   2 +-
 source/lib/src/common.cc        |   3 +-
 source/lmp/fix_dplr.cpp         | 470 ++++++++++++++++++++++++++++++++
 source/lmp/fix_dplr.h           |  53 ++++
 source/lmp/pppm_dplr.cpp        | 401 +++++++++++++++++++++++++++
 source/lmp/pppm_dplr.h          |  54 ++++
 7 files changed, 981 insertions(+), 4 deletions(-)
 create mode 100644 source/lmp/fix_dplr.cpp
 create mode 100644 source/lmp/fix_dplr.h
 create mode 100644 source/lmp/pppm_dplr.cpp
 create mode 100644 source/lmp/pppm_dplr.h

diff --git a/source/lib/include/DeepTensor.h b/source/lib/include/DeepTensor.h
index bfc969d7eb..867cff37cc 100644
--- a/source/lib/include/DeepTensor.h
+++ b/source/lib/include/DeepTensor.h
@@ -28,7 +28,7 @@ class DeepTensor
   VALUETYPE cutoff () const {assert(inited); return rcut;};
   int numb_types () const {assert(inited); return ntypes;};
   int output_dim () const {assert(inited); return odim;};
-  vector<int> sel_types () const {assert(inited); return sel_type;};
+  const vector<int> & sel_types () const {assert(inited); return sel_type;};
 private:
   Session* session;
   string name_scope;
diff --git a/source/lib/src/DataModifier.cc b/source/lib/src/DataModifier.cc
index ff15486281..65b3f85dd4 100644
--- a/source/lib/src/DataModifier.cc
+++ b/source/lib/src/DataModifier.cc
@@ -175,7 +175,7 @@ compute (vector<VALUETYPE> &		dfcorr_,
   vector<int > dtype_sort_loc = nnpmap.get_type();
   vector<VALUETYPE> dextf;
   for(int ii = 0; ii < dtype_sort_loc.size(); ++ii){
-    if (lower_bound(sel_type.begin(), sel_type.end(), dtype_sort_loc[ii]) != sel_type.end()){
+    if (binary_search(sel_type.begin(), sel_type.end(), dtype_sort_loc[ii])){
       // selected atom
       int first_idx = real_bkw_map[sort_bkw_map[ii]];
       int second_idx = bd_idx[first_idx];
diff --git a/source/lib/src/common.cc b/source/lib/src/common.cc
index 97d3c7fa9a..2f739ea196 100644
--- a/source/lib/src/common.cc
+++ b/source/lib/src/common.cc
@@ -24,8 +24,7 @@ select_by_type(vector<int> & fwd_map,
   for (int ii = 0; ii < nall; ++ii){
     // exclude virtual sites
     // select the type with id < ntypes
-    if (lower_bound(sel_type.begin(), sel_type.end(), datype_[ii]) !=
-	sel_type.end()){
+    if (binary_search(sel_type.begin(), sel_type.end(), datype_[ii])){
       bkw_map.push_back(ii);
       if (ii < nloc) {
 	nloc_real += 1;
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
new file mode 100644
index 0000000000..481da66840
--- /dev/null
+++ b/source/lmp/fix_dplr.cpp
@@ -0,0 +1,470 @@
+#include <iostream>
+#include <iomanip>
+#include <limits>
+#include "atom.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "fix.h"
+#include "fix_dplr.h"
+#include "pppm_dplr.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+using namespace std;
+
+static bool 
+is_key (const string& input) 
+{
+  vector<string> keys ;
+  keys.push_back("model");
+  keys.push_back("type_associate");
+  keys.push_back("bond_type");
+  for (int ii = 0; ii < keys.size(); ++ii){
+    if (input == keys[ii]) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+FixDPLR::FixDPLR(LAMMPS *lmp, int narg, char **arg) 
+    :Fix(lmp, narg, arg)
+{
+  virial_flag = 1;
+
+  if (strcmp(update->unit_style,"metal") != 0) {
+    error->all(FLERR,"Pair deepmd requires metal unit, please set it by \"units metal\"");
+  }
+  
+  int iarg = 3;
+  vector<int> map_vec;
+  bond_type.clear();
+  while (iarg < narg) {
+    if (! is_key(arg[iarg])) {
+      error->all(FLERR,"Illegal pair_style command\nwrong number of parameters\n");
+    }
+    if (string(arg[iarg]) == string("model")) {
+      if (iarg+1 > narg) error->all(FLERR,"Illegal fix adapt command");
+      model = string(arg[iarg+1]);
+      iarg += 2;
+    }
+    if (string(arg[iarg]) == string("type_associate")) {
+      int iend = iarg+1;
+      while (iend < narg && (! is_key(arg[iend]) )) {
+	map_vec.push_back(atoi(arg[iend])-1);
+	iend ++;
+      }
+      iarg = iend;
+    }
+    if (string(arg[iarg]) == string("bond_type")) {
+      int iend = iarg+1;
+      while (iend < narg && (! is_key(arg[iend]) )) {
+	bond_type.push_back(atoi(arg[iend])-1);
+	iend ++;
+      }
+      sort(bond_type.begin(), bond_type.end());
+      iarg = iend;
+    }
+    else {
+      break;
+    }
+  }
+  assert(map_vec.size() % 2 == 0), "number of ints provided by type_associate should be even";
+  for (int ii = 0; ii < map_vec.size()/2; ++ii){
+    type_asso[map_vec[ii*2+0]] = map_vec[ii*2+1];
+    bk_type_asso[map_vec[ii*2+1]] = map_vec[ii*2+0];
+  }
+
+  // dpt.init(model);
+  // dtm.init("frozen_model.pb");
+  dpt.init(model, 0, "load");
+  dtm.init(model, 0, "load");
+
+  sel_type = dpt.sel_types();
+  sort(sel_type.begin(), sel_type.end());
+  dpl_type.clear();
+  for (int ii = 0; ii < sel_type.size(); ++ii){
+    dpl_type.push_back(type_asso[sel_type[ii]]);
+  }
+
+  pair_nnp = (PairNNP *) force->pair_match("deepmd",1);
+  if (!pair_nnp) {
+    error->all(FLERR,"pair_style deepmd should be set before this fix\n");
+  }
+
+  // set comm size needed by this fix
+  comm_reverse = 3;
+}
+
+int FixDPLR::setmask()
+{
+  int mask = 0;
+  mask |= POST_INTEGRATE;
+  mask |= PRE_FORCE;
+  mask |= POST_FORCE;
+  return mask;
+}
+
+void FixDPLR::init()
+{
+  // double **xx = atom->x;
+  // double **vv = atom->v;
+  // int nlocal = atom->nlocal;
+  // for (int ii = 0; ii < nlocal; ++ii){
+  //   cout << xx[ii][0] << " " 
+  // 	 << xx[ii][1] << " " 
+  // 	 << xx[ii][2] << "   " 
+  // 	 << vv[ii][0] << " " 
+  // 	 << vv[ii][1] << " " 
+  // 	 << vv[ii][2] << " " 
+  // 	 << endl;
+  // }
+}
+
+void FixDPLR::setup(int vflag)
+{
+  // if (strstr(update->integrate_style,"verlet"))
+  //   post_force(vflag);
+  // else {
+  //   error->all(FLERR, "respa is not supported by this fix");
+  // }
+  if (vflag) {
+    v_setup(vflag);
+  }
+  else {
+    evflag = 0;
+  }
+}
+
+
+void
+FixDPLR::get_valid_pairs(vector<pair<int,int> >& pairs)
+{
+  pairs.clear();
+  
+  int nlocal = atom->nlocal;
+  int nghost = atom->nghost;
+  int nall = nlocal + nghost;
+  vector<int > dtype (nall);
+  // get type
+  {
+    int *type = atom->type;
+    for (int ii = 0; ii < nall; ++ii){
+      dtype[ii] = type[ii] - 1;
+    }
+  }
+
+  int **bondlist = neighbor->bondlist;
+  int nbondlist = neighbor->nbondlist;
+  for (int ii = 0; ii < nbondlist; ++ii){
+    int idx0=-1, idx1=-1;
+    int bd_type = bondlist[ii][2] - 1;
+    if ( ! binary_search(bond_type.begin(), bond_type.end(), bd_type) ){
+      continue;
+    }
+    if (binary_search(sel_type.begin(), sel_type.end(), dtype[bondlist[ii][0]]) 
+	&& 
+	binary_search(dpl_type.begin(), dpl_type.end(), dtype[bondlist[ii][1]])
+	){
+      idx0 = bondlist[ii][0];
+      idx1 = bondlist[ii][1];
+    }
+    else if (binary_search(sel_type.begin(), sel_type.end(), dtype[bondlist[ii][1]])
+	     &&
+	     binary_search(dpl_type.begin(), dpl_type.end(), dtype[bondlist[ii][0]])
+	){
+      idx0 = bondlist[ii][1];
+      idx1 = bondlist[ii][0];
+    }
+    else {
+      error->all(FLERR, "find a bonded pair the types of which are not associated");
+    }
+    if ( ! (idx0 < nlocal && idx1 < nlocal) ){
+      error->all(FLERR, "find a bonded pair that is not on the same processor, something should not happen");
+    }
+    pairs.push_back(pair<int,int>(idx0, idx1));
+  }
+}
+
+void FixDPLR::post_integrate()
+{
+  double **x = atom->x;
+  double **v = atom->v;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nghost = atom->nghost;
+  int nall = nlocal + nghost;
+
+  vector<pair<int,int> > valid_pairs;
+  get_valid_pairs(valid_pairs);  
+  
+  for (int ii = 0; ii < valid_pairs.size(); ++ii){
+    int idx0 = valid_pairs[ii].first;
+    int idx1 = valid_pairs[ii].second;
+    for (int dd = 0; dd < 3; ++dd){
+      x[idx1][dd] = x[idx0][dd] ;
+      v[idx1][dd] = v[idx0][dd] ;
+      // v[idx1][dd] = 0.0;
+    }
+  }
+}
+
+void FixDPLR::pre_force(int vflag)
+{
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nghost = atom->nghost;
+  int nall = nlocal + nghost;
+
+  // if (eflag_atom) {
+  //   error->all(FLERR,"atomic energy calculation is not supported by this fix\n");
+  // }
+  
+  // declear inputs
+  vector<int > dtype (nall);
+  vector<FLOAT_PREC > dbox (9, 0) ;
+  vector<FLOAT_PREC > dcoord (nall * 3, 0.);
+  // get type
+  for (int ii = 0; ii < nall; ++ii){
+    dtype[ii] = type[ii] - 1;
+  }  
+  // get box
+  dbox[0] = domain->h[0];	// xx
+  dbox[4] = domain->h[1];	// yy
+  dbox[8] = domain->h[2];	// zz
+  dbox[7] = domain->h[3];	// zy
+  dbox[6] = domain->h[4];	// zx
+  dbox[3] = domain->h[5];	// yx
+  // get coord
+  for (int ii = 0; ii < nall; ++ii){
+    for (int dd = 0; dd < 3; ++dd){
+      dcoord[ii*3+dd] = x[ii][dd] - domain->boxlo[dd];
+    }
+  }
+  // get lammps nlist
+  NeighList * list = pair_nnp->list;
+  LammpsNeighborList lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
+  // declear output
+  vector<FLOAT_PREC> tensor;
+  // compute
+  dpt.compute(tensor, dcoord, dtype, dbox, nghost, lmp_list);
+  // cout << "tensor of size " << tensor.size() << endl;
+  // cout << "nghost " << nghost << endl;
+  // cout << "nall " << dtype.size() << endl;
+  // cout << "nloc " << nlocal << endl;
+  // for (int ii = 0; ii < tensor.size(); ++ii){
+  //   if (ii%3 == 0){
+  //     cout << endl;
+  //   }
+  //   cout << tensor[ii] << "\t";
+  // }
+  // cout << endl;
+  // for (int ii = 0; ii < nlocal * 3; ++ii){
+  //   if (ii%3 == 0){
+  //     cout << endl;
+  //   }
+  //   cout << dcoord[ii] << "\t";
+  // }
+  // int max_type = 0;
+  // for (int ii = 0; ii < dtype.size(); ++ii){
+  //   if (dtype[ii] > max_type) {
+  //     max_type = dtype[ii];
+  //   }
+  // }
+
+  // selected type
+  vector<int> dpl_type;
+  for (int ii = 0; ii < sel_type.size(); ++ii){
+    dpl_type.push_back(type_asso[sel_type[ii]]);
+  }
+  vector<int> sel_fwd, sel_bwd;
+  int sel_nghost;
+  select_by_type(sel_fwd, sel_bwd, sel_nghost, dcoord, dtype, nghost, sel_type);
+  int sel_nall = sel_bwd.size();
+  int sel_nloc = sel_nall - sel_nghost;
+  vector<int> sel_type(sel_bwd.size());
+  select_map<int>(sel_type, dtype, sel_fwd, 1);
+  
+  NNPAtomMap<FLOAT_PREC> nnp_map(sel_type.begin(), sel_type.begin() + sel_nloc);
+  const vector<int> & sort_fwd_map(nnp_map.get_fwd_map());
+
+  vector<pair<int,int> > valid_pairs;
+  get_valid_pairs(valid_pairs);  
+  
+  int odim = dpt.output_dim();
+  assert(odim == 3);
+  dipole_recd.resize(nall * 3);
+  fill(dipole_recd.begin(), dipole_recd.end(), 0.0);
+  for (int ii = 0; ii < valid_pairs.size(); ++ii){
+    int idx0 = valid_pairs[ii].first;
+    int idx1 = valid_pairs[ii].second;
+    assert(idx0 < sel_fwd.size() && sel_fwd[idx0] < sort_fwd_map.size());
+    int res_idx = sort_fwd_map[sel_fwd[idx0]];
+    // int ret_idx = dpl_bwd[res_idx];
+    for (int dd = 0; dd < 3; ++dd){
+      x[idx1][dd] = x[idx0][dd] + tensor[res_idx * 3 + dd];
+      // res_buff[idx1 * odim + dd] = tensor[res_idx * odim + dd];
+      dipole_recd[idx0*3+dd] = tensor[res_idx * 3 + dd];
+    }
+  }
+  // cout << "-------------------- fix/dplr: pre force " << endl;
+  // for (int ii = 0; ii < nlocal; ++ii){
+  //   cout << ii << "    ";
+  //   for (int dd = 0; dd < 3; ++dd){
+  //     cout << x[ii][dd] << " " ;
+  //   }
+  //   cout << endl;
+  // }
+}
+
+
+void FixDPLR::post_force(int vflag)
+{
+  if (vflag) {
+    v_setup(vflag);
+  }
+  else {
+    evflag = 0;
+  }
+  if (vflag_atom) {
+    error->all(FLERR,"atomic virial calculation is not supported by this fix\n");
+  }
+
+  PPPMDPLR * pppm_dplr = (PPPMDPLR*) force->kspace_match("pppm/dplr", 1);
+  if (!pppm_dplr) {
+    error->all(FLERR,"kspace_style pppm/dplr should be set before this fix\n");
+  }
+  const vector<double > & dfele_(pppm_dplr->get_fele());
+  int nlocal = atom->nlocal;
+  int nghost = atom->nghost;
+  int nall = nlocal + nghost;
+  vector<FLOAT_PREC> dcoord(nall*3, 0.0), dbox(9, 0.0), dfele(nlocal*3, 0.0);
+  vector<int> dtype(nall, 0);
+  {
+    int *type = atom->type;
+    for (int ii = 0; ii < nall; ++ii){
+      dtype[ii] = type[ii] - 1;
+    }
+    dbox[0] = domain->h[0];	// xx
+    dbox[4] = domain->h[1];	// yy
+    dbox[8] = domain->h[2];	// zz
+    dbox[7] = domain->h[3];	// zy
+    dbox[6] = domain->h[4];	// zx
+    dbox[3] = domain->h[5];	// yx
+    // get coord
+    double ** x = atom->x;
+    for (int ii = 0; ii < nall; ++ii){
+      for (int dd = 0; dd < 3; ++dd){
+	dcoord[ii*3+dd] = x[ii][dd] - domain->boxlo[dd];
+      }
+    }
+    assert(dfele_.size() == nlocal * 3);
+    for (int ii = 0; ii < nlocal*3; ++ii){
+      dfele[ii] = dfele_[ii];
+    }
+  }
+  // lmp nlist
+  NeighList * list = pair_nnp->list;
+  LammpsNeighborList lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
+  // bonded pairs
+  vector<pair<int,int> > valid_pairs;
+  get_valid_pairs(valid_pairs);  
+  // output vects
+  vector<FLOAT_PREC> dfcorr, dvcorr;
+  // compute
+  dtm.compute(dfcorr, dvcorr, dcoord, dtype, dbox, valid_pairs, dfele, nghost, lmp_list);
+  assert(dfcorr.size() == dcoord.size());
+  assert(dfcorr.size() == nall * 3);
+  // backward communication of fcorr
+  dfcorr_buff.resize(dfcorr.size());
+  copy(dfcorr.begin(), dfcorr.end(), dfcorr_buff.begin());
+  comm->reverse_comm_fix(this,3);
+  copy(dfcorr_buff.begin(), dfcorr_buff.end(), dfcorr.begin());
+  // // check and print
+  // cout << "-------------------- fix/dplr: post force " << endl;
+  // cout << "dfcorr.size() " << dfcorr.size() << endl;
+  // cout << "dcoord.size() " << dcoord.size() << endl;
+  // for (int ii = 0; ii < nlocal; ++ii){
+  //   cout << ii << "\t x: ";
+  //   for (int dd = 0; dd < 3; ++dd){
+  //     cout << dcoord[ii*3+dd] << " \t " ;
+  //   }
+  //   cout << ii << "\t f: ";
+  //   for (int dd = 0; dd < 3; ++dd){
+  //     cout << dfcorr[ii*3+dd] << " \t " ;
+  //   }
+  //   cout << endl;
+  // }
+  // apply the force correction
+  double ** f = atom->f;
+  for (int ii = 0; ii < nlocal; ++ii){
+    for(int dd = 0; dd < 3; ++dd){
+      f[ii][dd] += dfcorr[ii*3+dd];
+    }
+  }
+  // cout << "virial corr1 ";
+  // for (int ii = 0; ii < 9; ++ii){
+  //   cout << dvcorr[ii] << " " ;
+  // }
+  // cout << endl;
+  for (int ii = 0; ii < valid_pairs.size(); ++ii){
+    int idx0 = valid_pairs[ii].first;
+    int idx1 = valid_pairs[ii].second;
+    for (int dd0 = 0; dd0 < 3; ++dd0){
+      for (int dd1 = 0; dd1 < 3; ++dd1){
+	dvcorr[dd0*3+dd1] -= dfele[idx1*3+dd0] * dipole_recd[idx0*3+dd1];
+      }
+    }    
+  }
+  // cout << "virial corr2 ";
+  // for (int ii = 0; ii < 9; ++ii){
+  //   cout << dvcorr[ii] << " " ;
+  // }
+  // cout << endl;
+  if (evflag){
+    double vv[6] = {0.0};
+    vv[0] += dvcorr[0];
+    vv[1] += dvcorr[4];
+    vv[2] += dvcorr[8];
+    vv[3] += dvcorr[3];
+    vv[4] += dvcorr[6];
+    vv[5] += dvcorr[7];
+    v_tally(0, vv);
+  }
+}
+
+
+int FixDPLR::pack_reverse_comm(int n, int first, double *buf)
+{
+  int m = 0;
+  int last = first + n;
+  for (int i = first; i < last; i++) {
+    buf[m++] = dfcorr_buff[3*i+0];
+    buf[m++] = dfcorr_buff[3*i+1];
+    buf[m++] = dfcorr_buff[3*i+2];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixDPLR::unpack_reverse_comm(int n, int *list, double *buf)
+{
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    int j = list[i];
+    dfcorr_buff[3*j+0] += buf[m++];
+    dfcorr_buff[3*j+1] += buf[m++];
+    dfcorr_buff[3*j+2] += buf[m++];
+  }
+}
+
+
+
diff --git a/source/lmp/fix_dplr.h b/source/lmp/fix_dplr.h
new file mode 100644
index 0000000000..edc9204874
--- /dev/null
+++ b/source/lmp/fix_dplr.h
@@ -0,0 +1,53 @@
+#ifdef FIX_CLASS
+
+FixStyle(dplr,FixDPLR)
+
+#else
+
+#ifndef LMP_FIX_DPLR_H
+#define LMP_FIX_DPLR_H
+
+#include <stdio.h>
+#include "fix.h"
+#include "pair_nnp.h"
+#include "DeepTensor.h"
+#include "DataModifier.h"
+
+#ifdef HIGH_PREC
+#define FLOAT_PREC double
+#else
+#define FLOAT_PREC float
+#endif
+
+namespace LAMMPS_NS {
+  class FixDPLR : public Fix {
+public:
+    FixDPLR(class LAMMPS *, int, char **);
+    virtual ~FixDPLR() {};
+    int setmask();
+    void init();
+    void setup(int);
+    void post_integrate();
+    void pre_force(int);
+    void post_force(int);
+    int pack_reverse_comm(int, int, double *);
+    void unpack_reverse_comm(int, int *, double *);
+private:
+    PairNNP * pair_nnp;
+    DeepTensor dpt;
+    DataModifier dtm;
+    string model;
+    int ntypes;
+    vector<int > sel_type;
+    vector<int > dpl_type;
+    vector<int > bond_type;
+    map<int,int > type_asso;
+    map<int,int > bk_type_asso;
+    vector<FLOAT_PREC> dipole_recd;
+    vector<double> dfcorr_buff;
+    void get_valid_pairs(vector<pair<int,int> >& pairs);
+  };
+}
+
+#endif // LMP_FIX_DPLR_H
+#endif // FIX_CLASS
diff --git a/source/lmp/pppm_dplr.cpp b/source/lmp/pppm_dplr.cpp
new file mode 100644
index 0000000000..f706320f79
--- /dev/null
+++ b/source/lmp/pppm_dplr.cpp
@@ -0,0 +1,401 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include <math.h>
+#include "pppm_dplr.h"
+#include "atom.h"
+#include "domain.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+#include "math_const.h"
+#include "pppm.h"
+#include "gridcomm.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+enum{REVERSE_RHO};
+enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
+
+#define OFFSET 16384
+
+#ifdef FFT_SINGLE
+#define ZEROF 0.0f
+#define ONEF  1.0f
+#else
+#define ZEROF 0.0
+#define ONEF  1.0
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PPPMDPLR::PPPMDPLR(LAMMPS *lmp, int narg, char **arg) :
+  PPPM(lmp, narg, arg)
+{
+  triclinic_support = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMDPLR::init()
+{
+  // DPLR PPPM requires newton on, b/c it computes forces on ghost atoms
+
+  if (force->newton == 0)
+    error->all(FLERR,"Kspace style pppm/dplr requires newton on");
+
+  PPPM::init();
+
+  int nlocal = atom->nlocal;
+  // cout << " ninit pppm/dplr ---------------------- " << nlocal << endl;
+  fele.resize(nlocal*3);
+  fill(fele.begin(), fele.end(), 0.0);
+}
+
+
+/* ----------------------------------------------------------------------
+   compute the PPPM long-range force, energy, virial
+------------------------------------------------------------------------- */
+
+void PPPMDPLR::compute(int eflag, int vflag)
+{
+  int i,j;
+
+  // set energy/virial flags
+  // invoke allocate_peratom() if needed for first time
+
+  if (eflag || vflag) ev_setup(eflag,vflag);
+  else evflag = evflag_atom = eflag_global = vflag_global =
+         eflag_atom = vflag_atom = 0;
+
+  if (evflag_atom && !peratom_allocate_flag) {
+    allocate_peratom();
+    cg_peratom->ghost_notify();
+    cg_peratom->setup();
+  }
+
+  // if atom count has changed, update qsum and qsqsum
+
+  if (atom->natoms != natoms_original) {
+    qsum_qsq();
+    natoms_original = atom->natoms;
+  }
+
+  // return if there are no charges
+
+  if (qsqsum == 0.0) return;
+
+  // convert atoms from box to lamda coords
+
+  if (triclinic == 0) boxlo = domain->boxlo;
+  else {
+    boxlo = domain->boxlo_lamda;
+    domain->x2lamda(atom->nlocal);
+  }
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(part2grid);
+    nmax = atom->nmax;
+    memory->create(part2grid,nmax,3,"pppm:part2grid");
+  }
+
+  // find grid points for all my particles
+  // map my particle charge onto my local 3d density grid
+
+  particle_map();
+  make_rho();
+
+  // all procs communicate density values from their ghost cells
+  //   to fully sum contribution in their 3d bricks
+  // remap from 3d decomposition to FFT decomposition
+
+  cg->reverse_comm(this,REVERSE_RHO);
+  brick2fft();
+
+  // compute potential gradient on my FFT grid and
+  //   portion of e_long on this proc's FFT grid
+  // return gradients (electric fields) in 3d brick decomposition
+  // also performs per-atom calculations via poisson_peratom()
+
+  poisson();
+
+  // all procs communicate E-field values
+  // to fill ghost cells surrounding their 3d bricks
+
+  if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
+  else cg->forward_comm(this,FORWARD_IK);
+
+  // extra per-atom energy/virial communication
+
+  if (evflag_atom) {
+    if (differentiation_flag == 1 && vflag_atom)
+      cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
+    else if (differentiation_flag == 0)
+      cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
+  }
+
+  // calculate the force on my particles
+
+  fieldforce();
+
+  // extra per-atom energy/virial communication
+
+  if (evflag_atom) fieldforce_peratom();
+
+  // sum global energy across procs and add in volume-dependent term
+
+  const double qscale = qqrd2e * scale;
+
+  if (eflag_global) {
+    double energy_all;
+    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+    energy = energy_all;
+
+    energy *= 0.5*volume;
+    // do not add self-term, for neutral systems qsum == 0
+    // energy -= g_ewald*qsqsum/MY_PIS +
+    //   MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy *= qscale;
+  }
+
+  // sum global virial across procs
+
+  if (vflag_global) {
+    double virial_all[6];
+    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
+    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+  }
+  // std::cout<< "energy in pppm -------------------" << std::endl;
+  // std::cout << energy << " " 
+  // 	    << std::endl;
+  // std::cout<< "virial in pppm -------------------" << std::endl;
+  // for (int ii = 0; ii < 6; ++ii){
+  //   std::cout << virial[ii] << " " ;
+  // }
+  // std::cout << std::endl;
+
+  // per-atom energy/virial
+  // energy includes self-energy correction
+  // ntotal accounts for TIP4P tallying eatom/vatom for ghost atoms
+
+  if (evflag_atom) {
+    double *q = atom->q;
+    int nlocal = atom->nlocal;
+    int ntotal = nlocal;
+    if (tip4pflag) ntotal += atom->nghost;
+
+    if (eflag_atom) {
+      for (i = 0; i < nlocal; i++) {
+        eatom[i] *= 0.5;
+        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
+          (g_ewald*g_ewald*volume);
+        eatom[i] *= qscale;
+      }
+      for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
+    }
+
+    if (vflag_atom) {
+      for (i = 0; i < ntotal; i++)
+        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
+    }
+  }
+
+  // 2d slab correction
+
+  if (slabflag == 1) slabcorr();
+
+  // convert atoms back from lamda to box coords
+
+  if (triclinic) domain->lamda2x(atom->nlocal);
+}
+
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles for ik
+------------------------------------------------------------------------- */
+void PPPMDPLR::fieldforce_ik()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz,x0,y0,z0;
+  FFT_SCALAR ekx,eky,ekz;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  // double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+  int nghost = atom->nghost;
+  int nall = nlocal + nghost;
+
+  fele.resize(nlocal*3);
+  fill(fele.begin(), fele.end(), 0.0);
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      z0 = rho1d[2][n];
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        y0 = z0*rho1d[1][m];
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          x0 = y0*rho1d[0][l];
+          ekx -= x0*vdx_brick[mz][my][mx];
+          eky -= x0*vdy_brick[mz][my][mx];
+          ekz -= x0*vdz_brick[mz][my][mx];
+        }
+      }
+    }
+
+    // convert E-field to force
+
+    const double qfactor = qqrd2e * scale * q[i];
+    fele[i*3+0] += qfactor*ekx;
+    fele[i*3+1] += qfactor*eky;
+    if (slabflag != 2) fele[i*3+2] += qfactor*ekz;
+  }
+
+  // vector<FLOAT_PREC> dcoord(nall*3), dbox(9);
+  // vector<int> dtype(nall);
+  // {
+  //   double ** xx = atom->x;
+  //   for(int ii = 0; ii < nall; ++ii){
+  //     for (int dd = 0; dd < 3; +=dd){
+  // 	dcoord[ii*3+dd] = xx[ii][dd];
+  //     }
+  //   }
+  //   int *type = atom->type;
+  //   for (int ii = 0; ii < nall; ++ii){
+  //     dtype[ii] = type[ii] - 1;
+  //   }
+  // }
+}
+
+/* ----------------------------------------------------------------------
+   interpolate from grid to get electric field & force on my particles for ad
+------------------------------------------------------------------------- */
+
+void PPPMDPLR::fieldforce_ad()
+{
+  int i,l,m,n,nx,ny,nz,mx,my,mz;
+  FFT_SCALAR dx,dy,dz;
+  FFT_SCALAR ekx,eky,ekz;
+  double s1,s2,s3;
+  double sf = 0.0;
+  double *prd;
+
+  prd = domain->prd;
+  double xprd = prd[0];
+  double yprd = prd[1];
+  double zprd = prd[2];
+
+  double hx_inv = nx_pppm/xprd;
+  double hy_inv = ny_pppm/yprd;
+  double hz_inv = nz_pppm/zprd;
+
+  // loop over my charges, interpolate electric field from nearby grid points
+  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
+  // (dx,dy,dz) = distance to "lower left" grid pt
+  // (mx,my,mz) = global coords of moving stencil pt
+  // ek = 3 components of E-field on particle
+
+  double *q = atom->q;
+  double **x = atom->x;
+  double **f = atom->f;
+
+  int nlocal = atom->nlocal;
+
+  vector<double > fele(nlocal, 0.0);
+
+  for (i = 0; i < nlocal; i++) {
+    nx = part2grid[i][0];
+    ny = part2grid[i][1];
+    nz = part2grid[i][2];
+    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
+    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
+    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+
+    compute_rho1d(dx,dy,dz);
+    compute_drho1d(dx,dy,dz);
+
+    ekx = eky = ekz = ZEROF;
+    for (n = nlower; n <= nupper; n++) {
+      mz = n+nz;
+      for (m = nlower; m <= nupper; m++) {
+        my = m+ny;
+        for (l = nlower; l <= nupper; l++) {
+          mx = l+nx;
+          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
+          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
+        }
+      }
+    }
+    ekx *= hx_inv;
+    eky *= hy_inv;
+    ekz *= hz_inv;
+
+    // convert E-field to force and substract self forces
+
+    const double qfactor = qqrd2e * scale;
+
+    s1 = x[i][0]*hx_inv;
+    s2 = x[i][1]*hy_inv;
+    s3 = x[i][2]*hz_inv;
+    sf = sf_coeff[0]*sin(2*MY_PI*s1);
+    sf += sf_coeff[1]*sin(4*MY_PI*s1);
+    sf *= 2*q[i]*q[i];
+    fele[i*3+0] += qfactor*(ekx*q[i] - sf);
+
+    sf = sf_coeff[2]*sin(2*MY_PI*s2);
+    sf += sf_coeff[3]*sin(4*MY_PI*s2);
+    sf *= 2*q[i]*q[i];
+    fele[i*3+1] += qfactor*(eky*q[i] - sf);
+
+
+    sf = sf_coeff[4]*sin(2*MY_PI*s3);
+    sf += sf_coeff[5]*sin(4*MY_PI*s3);
+    sf *= 2*q[i]*q[i];
+    if (slabflag != 2) fele[i*3+2] += qfactor*(ekz*q[i] - sf);
+  }
+
+  // for (int ii = 0; ii < nlocal; ++ii){
+  //   cout << ii << "\t ";
+  //   for (int dd = 0; dd < 3; ++dd){
+  //     cout << fele[ii*3+dd] << " " ;
+  //   }
+  //   cout << endl;
+  // }
+}
+
+
+
diff --git a/source/lmp/pppm_dplr.h b/source/lmp/pppm_dplr.h
new file mode 100644
index 0000000000..884d41bce2
--- /dev/null
+++ b/source/lmp/pppm_dplr.h
@@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(pppm/dplr,PPPMDPLR)
+
+#else
+
+#ifndef LMP_PPPM_DPLR_H
+#define LMP_PPPM_DPLR_H
+
+#ifdef HIGH_PREC
+#define FLOAT_PREC double
+#else
+#define FLOAT_PREC float
+#endif
+
+#include "pppm.h"
+#include <iostream>
+#include <vector>
+using namespace std;
+
+namespace LAMMPS_NS {
+
+  class PPPMDPLR : public PPPM {
+public:
+    PPPMDPLR(class LAMMPS *, int, char **);
+    virtual ~PPPMDPLR () {};
+    void init();
+    const vector<double > & get_fele() const {return fele;};
+protected:
+    virtual void compute(int, int);
+    virtual void fieldforce_ik();
+    virtual void fieldforce_ad();    
+private:
+    vector<double > fele;
+  };
+
+}
+
+#endif
+#endif
+

From cd7bc372abe83a935748c785d4eaaba8f6b460dc Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 6 Dec 2019 13:10:40 +0800
Subject: [PATCH 37/38] remove tmp files after test

---
 source/tests/test_data_modifier_shuffle.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/source/tests/test_data_modifier_shuffle.py b/source/tests/test_data_modifier_shuffle.py
index d70bd94b3b..5148a251b9 100644
--- a/source/tests/test_data_modifier_shuffle.py
+++ b/source/tests/test_data_modifier_shuffle.py
@@ -1,4 +1,4 @@
-import os,sys,platform,json
+import os,sys,platform,json,shutil
 import numpy as np
 import unittest
 import dpdata
@@ -43,6 +43,10 @@ def setUp(self):
 
     def tearDown(self):
         tf.reset_default_graph()        
+        if os.path.isdir(os.path.join(modifier_datapath, 'sys_test_0')):
+            shutil.rmtree(os.path.join(modifier_datapath, 'sys_test_0'))
+        if os.path.isfile(os.path.join(modifier_datapath, 'dipole.pb')):
+            os.remove(os.path.join(modifier_datapath, 'dipole.pb'))
 
     def _setUp(self):
         args = Args()
@@ -104,7 +108,7 @@ def _setUp_data(self):
         self.dipoles0 = np.random.random([self.nframes, self.nsel * 3]) 
         self.box0 = np.reshape(np.eye(3) * scale, [-1, 9])
         self.box0 = np.tile(self.box0, [self.nframes, 1])
-        self._write_sys_data('data_modifier/sys_test_0', 
+        self._write_sys_data(os.path.join(modifier_datapath, 'sys_test_0'), 
                              self.atom_types0, self.coords0, self.dipoles0, self.box0)
         # sys1
         self.idx_map = np.array([6, 7, 1, 0, 5, 2, 4, 3], dtype = int)

From 188c918e5167b66e5a5b2c65d9b16a6f2db9110d Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sun, 8 Dec 2019 18:52:52 +0800
Subject: [PATCH 38/38] change the DataModification: freeze dipole part with
 name dipole_charge. make the freeze and test work

---
 source/lmp/fix_dplr.cpp                    |  4 +-
 source/scripts/freeze.py                   | 12 +++-
 source/tests/test_data_modifier.py         | 10 ++--
 source/tests/test_data_modifier_shuffle.py |  4 +-
 source/train/Data.py                       |  4 +-
 source/train/DataModifier.py               | 65 ++++++++++++++++------
 source/train/DeepDipole.py                 |  5 +-
 source/train/DeepEval.py                   | 32 ++++++-----
 source/train/DeepPot.py                    | 39 ++++++++++++-
 source/train/test.py                       | 19 +++++--
 source/train/train.py                      |  2 +-
 11 files changed, 142 insertions(+), 54 deletions(-)

diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index 481da66840..c0f0bb930c 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -83,8 +83,8 @@ FixDPLR::FixDPLR(LAMMPS *lmp, int narg, char **arg)
 
   // dpt.init(model);
   // dtm.init("frozen_model.pb");
-  dpt.init(model, 0, "load");
-  dtm.init(model, 0, "load");
+  dpt.init(model, 0, "dipole_charge");
+  dtm.init(model, 0, "dipole_charge");
 
   sel_type = dpt.sel_types();
   sort(sel_type.begin(), sel_type.end());
diff --git a/source/scripts/freeze.py b/source/scripts/freeze.py
index a7bb8ca11a..c83e7e569c 100755
--- a/source/scripts/freeze.py
+++ b/source/scripts/freeze.py
@@ -35,7 +35,7 @@
 import deepmd._soft_min_force_grad
 import deepmd._soft_min_virial_grad
 
-def _make_node_names(model_type = None) : 
+def _make_node_names(model_type, modifier_type = None) :
     if model_type == 'ener':
         nodes = "o_energy,o_force,o_virial,o_atom_energy,o_atom_virial,descrpt_attr/rcut,descrpt_attr/ntypes,fitting_attr/dfparam,fitting_attr/daparam,model_attr/tmap,model_attr/model_type"
     elif model_type == 'wfc':
@@ -48,6 +48,8 @@ def _make_node_names(model_type = None) :
         nodes = "o_global_polar,descrpt_attr/rcut,descrpt_attr/ntypes,model_attr/tmap,model_attr/sel_type,model_attr/model_type"
     else:
         raise RuntimeError('unknow model type ' + model_type)
+    if modifier_type == 'dipole_charge':
+        nodes += ",modifier_attr/type,modifier_attr/mdl_name,modifier_attr/mdl_charge_map,modifier_attr/sys_charge_map,modifier_attr/ewald_h,modifier_attr/ewald_beta,dipole_charge/descrpt_attr/rcut,dipole_charge/descrpt_attr/ntypes,dipole_charge/model_attr/tmap,dipole_charge/model_attr/model_type,o_dm_force,dipole_charge/model_attr/sel_type,dipole_charge/o_dipole,dipole_charge/model_attr/output_dim,o_dm_virial,o_dm_av"
     return nodes
 
 def freeze_graph(model_folder, 
@@ -75,14 +77,18 @@ def freeze_graph(model_folder,
     # We retrieve the protobuf graph definition
     graph = tf.get_default_graph()
     input_graph_def = graph.as_graph_def()
-    # nodes = [n.name for n in input_graph_def.node]
+    nodes = [n.name for n in input_graph_def.node]
 
     # We start a session and restore the graph weights
     with tf.Session() as sess:
         saver.restore(sess, input_checkpoint)
         model_type = sess.run('model_attr/model_type:0', feed_dict = {}).decode('utf-8')
+        if 'modifier_attr/type' in nodes:
+            modifier_type = sess.run('modifier_attr/type:0', feed_dict = {}).decode('utf-8')
+        else:
+            modifier_type = None
         if output_node_names is None :
-            output_node_names = _make_node_names(model_type)
+            output_node_names = _make_node_names(model_type, modifier_type)
         print('The following nodes will be frozen: %s' % output_node_names)
 
         # We use a built-in TF helper to export variables to constants
diff --git a/source/tests/test_data_modifier.py b/source/tests/test_data_modifier.py
index cdf9828a4d..134e31095e 100644
--- a/source/tests/test_data_modifier.py
+++ b/source/tests/test_data_modifier.py
@@ -100,7 +100,7 @@ def _test_fv (self):
         data = Data()
         coord, box, atype = data.get_data()
         atype = atype[0]
-        ve, vf, vv = dcm.eval_modify(coord, box, atype)
+        ve, vf, vv = dcm.eval(coord, box, atype)
 
         hh = global_default_fv_hh
         hh=1e-4
@@ -115,8 +115,8 @@ def _test_fv (self):
             coordm = np.copy(coord)
             coordp[:,ii] += hh
             coordm[:,ii] -= hh
-            ep, _, __ = dcm.eval_modify(coordp, box, atype, eval_fv = False)
-            em, _, __ = dcm.eval_modify(coordm, box, atype, eval_fv = False)
+            ep, _, __ = dcm.eval(coordp, box, atype, eval_fv = False)
+            em, _, __ = dcm.eval(coordm, box, atype, eval_fv = False)
             num_f = -(ep - em) / (2.*hh)
             for ff in range(nframes):
                 self.assertAlmostEqual(vf[ff,ii], num_f[ff], 
@@ -140,8 +140,8 @@ def _test_fv (self):
                 coord3m = np.matmul(rcoord3, box3m)
                 coordp = np.reshape(coord3p, [nframes,-1])
                 coordm = np.reshape(coord3m, [nframes,-1])
-                ep, _, __ = dcm.eval_modify(coordp, boxp, atype, eval_fv = False)
-                em, _, __ = dcm.eval_modify(coordm, boxm, atype, eval_fv = False)
+                ep, _, __ = dcm.eval(coordp, boxp, atype, eval_fv = False)
+                em, _, __ = dcm.eval(coordm, boxm, atype, eval_fv = False)
                 num_deriv[:,ii,jj] = -(ep - em) / (2.*hh)
         # box3t = np.transpose(box3, [0,2,1])
         # t_esti = np.matmul(num_deriv, box3t)
diff --git a/source/tests/test_data_modifier_shuffle.py b/source/tests/test_data_modifier_shuffle.py
index 5148a251b9..81d36ed8ab 100644
--- a/source/tests/test_data_modifier_shuffle.py
+++ b/source/tests/test_data_modifier_shuffle.py
@@ -192,8 +192,8 @@ def test_modify(self):
                                    [1, 1, 1, 1, 1],
                                    1,
                                    0.25)
-        ve0, vf0, vv0 = dcm.eval_modify(self.coords0, self.box0, self.atom_types0)
-        ve1, vf1, vv1 = dcm.eval_modify(self.coords1, self.box1, self.atom_types1)
+        ve0, vf0, vv0 = dcm.eval(self.coords0, self.box0, self.atom_types0)
+        ve1, vf1, vv1 = dcm.eval(self.coords1, self.box1, self.atom_types1)
         vf01 = vf0[:,self.idx_map, :]
 
         for ii in range(self.nframes):
diff --git a/source/train/Data.py b/source/train/Data.py
index 6c0672afc2..d5a91f6b7b 100644
--- a/source/train/Data.py
+++ b/source/train/Data.py
@@ -121,7 +121,7 @@ def get_batch(self, batch_size) :
             self.set_count += 1
             set_size = self.batch_set["coord"].shape[0]
             if self.modifier is not None:
-                self.modifier.modify(self.batch_set)
+                self.modifier.modify_data(self.batch_set)
         iterator_1 = self.iterator + batch_size
         if iterator_1 >= set_size :
             iterator_1 = set_size
@@ -141,7 +141,7 @@ def get_test (self, ntests = -1) :
             idx = np.arange(ntests_)
         ret = self._get_subdata(self.test_set, idx = idx)
         if self.modifier is not None:
-            self.modifier.modify(ret)
+            self.modifier.modify_data(ret)
         return ret
 
     def get_type_map(self) :
diff --git a/source/train/DataModifier.py b/source/train/DataModifier.py
index 4fa57d9d86..27f3f93b60 100644
--- a/source/train/DataModifier.py
+++ b/source/train/DataModifier.py
@@ -1,6 +1,6 @@
 import os,platform
 import numpy as np
-from deepmd import DeepDipole
+from deepmd.DeepDipole import DeepDipole
 from deepmd.env import tf
 from deepmd.common import select_idx_map, make_default_mesh
 from deepmd.EwaldRecp import EwaldRecp
@@ -29,15 +29,22 @@ def __init__(self,
                  sys_charge_map, 
                  ewald_h = 1, 
                  ewald_beta = 1):
-        DeepDipole.__init__(self, model_name)
-        self.er = EwaldRecp(ewald_h, ewald_beta)
+        # the dipole model is loaded with prefix 'dipole_charge'
+        self.modifier_prefix = 'dipole_charge'
+        # init dipole model
+        DeepDipole.__init__(self, model_name, load_prefix = self.modifier_prefix)
+        self.model_name = model_name
         self.model_charge_map = model_charge_map
         self.sys_charge_map = sys_charge_map
         self.sel_type = list(self.get_sel_type())
+        # init ewald recp
+        self.ewald_h = ewald_h
+        self.ewald_beta = ewald_beta
+        self.er = EwaldRecp(self.ewald_h, self.ewald_beta)
         # dimension of dipole
         self.ext_dim = 3
-        self.t_ndesc  = self.graph.get_tensor_by_name ('load/descrpt_attr/ndescrpt:0')
-        self.t_sela  = self.graph.get_tensor_by_name ('load/descrpt_attr/sel:0')
+        self.t_ndesc  = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'descrpt_attr/ndescrpt:0'))
+        self.t_sela  = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'descrpt_attr/sel:0'))
         [self.ndescrpt, self.sel_a] = self.sess.run([self.t_ndesc, self.t_sela])
         self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
         self.nnei_a = np.cumsum(self.sel_a)[-1]
@@ -50,6 +57,25 @@ def __init__(self,
         self.ntypes = len(self.sel_a)
 
     def build_fv_graph(self):
+        with tf.variable_scope('modifier_attr') :
+            t_mdl_name = tf.constant(self.model_name, 
+                                     name = 'mdl_name', 
+                                     dtype = tf.string)
+            t_modi_type = tf.constant(self.modifier_prefix, 
+                                      name = 'type', 
+                                      dtype = tf.string)
+            t_mdl_charge_map = tf.constant(' '.join([str(ii) for ii in self.model_charge_map]),
+                                            name = 'mdl_charge_map', 
+                                            dtype = tf.string)
+            t_sys_charge_map = tf.constant(' '.join([str(ii) for ii in self.sys_charge_map]),
+                                            name = 'sys_charge_map', 
+                                            dtype = tf.string)
+            t_ewald_h = tf.constant(self.ewald_h,
+                                    name = 'ewald_h', 
+                                    dtype = tf.float64)
+            t_ewald_b = tf.constant(self.ewald_beta,
+                                    name = 'ewald_beta',
+                                    dtype = tf.float64)
         with self.graph.as_default():
             return self._build_fv_graph_inner()        
 
@@ -66,10 +92,10 @@ def _build_fv_graph_inner(self):
         # (nframes x natoms_sel) x 1 x 3
         self.t_ef_reshape = tf.reshape(self.t_ef, [nfxnas, 1, 3])
         # (nframes x natoms) x ndescrpt
-        self.descrpt = self.graph.get_tensor_by_name('load/o_rmat:0')
-        self.descrpt_deriv = self.graph.get_tensor_by_name('load/o_rmat_deriv:0')
-        self.nlist = self.graph.get_tensor_by_name('load/o_nlist:0')
-        self.rij = self.graph.get_tensor_by_name('load/o_rij:0')
+        self.descrpt = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rmat:0'))
+        self.descrpt_deriv = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rmat_deriv:0'))
+        self.nlist = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_nlist:0'))
+        self.rij = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rij:0'))
         # self.descrpt_reshape = tf.reshape(self.descrpt, [nf, 192 * self.ndescrpt])
         # self.descrpt_deriv = tf.reshape(self.descrpt_deriv, [nf, 192 * self.ndescrpt * 3])
 
@@ -163,7 +189,7 @@ def _slice_descrpt_deriv(self, deriv):
         return tf.concat(coll, axis = 1)        
 
 
-    def eval_modify(self, coord, box, atype, eval_fv = True):
+    def eval(self, coord, box, atype, eval_fv = True):
         coord, atype, imap = self.sort_input(coord, atype)
         natoms = coord.shape[1] // 3
         nframes = coord.shape[0]
@@ -194,6 +220,8 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
         all_f = np.concatenate(all_f, axis = 0)
         all_v = np.concatenate(all_v, axis = 0)
         # print('finish  er')
+        # reshape
+        tot_e.reshape([nframes,1])
 
         tot_f = None
         tot_v = None
@@ -218,6 +246,8 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
                 orig_idx = sel_idx_map[ii]            
                 tot_f[:,orig_idx*3:orig_idx*3+3] += ext_f[:,ii*3:ii*3+3]                
             tot_f = self.reverse_map(np.reshape(tot_f, [nframes,-1,3]), imap)
+            # reshape
+            tot_f = tot_f.reshape([nframes,natoms,3])
             # compute v
             dipole3 = np.reshape(dipole, [nframes, nsel, 3])
             ext_f3 = np.reshape(ext_f, [nframes, nsel, 3])
@@ -228,6 +258,8 @@ def eval_modify(self, coord, box, atype, eval_fv = True):
             fd_corr_v = -np.matmul(ext_f3, dipole3).reshape([nframes, 9])
             # print(all_v, '\n', corr_v, '\n', fd_corr_v)
             tot_v = all_v + corr_v + fd_corr_v
+            # reshape
+            tot_v = tot_v.reshape([nframes,9])
 
         return tot_e, tot_f, tot_v
 
@@ -276,7 +308,7 @@ def _extend_system(self, coord, box, atype, charge):
         ref_coord = coord3[:,sel_idx_map,:]
         ref_coord = np.reshape(ref_coord, [nframes, nsel * 3])
         
-        dipole = self.eval(coord, box, atype)
+        dipole = DeepDipole.eval(self, coord, box, atype)
         dipole = np.reshape(dipole, [nframes, nsel * 3])
         
         wfcc_coord = ref_coord + dipole
@@ -296,8 +328,7 @@ def _extend_system(self, coord, box, atype, charge):
         return all_coord, all_charge, dipole
 
 
-    def modify(self, 
-               data):
+    def modify_data(self, data):
         if 'find_energy' not in data and 'find_force' not in data and 'find_virial' not in data:
             return
 
@@ -308,16 +339,16 @@ def modify(self,
         atype = atype[0]
         nframes = coord.shape[0]
 
-        tot_e, tot_f, tot_v = self.eval_modify(coord, box, atype)
+        tot_e, tot_f, tot_v = self.eval(coord, box, atype)
 
         # print(tot_f[:,0])
         
         if 'find_energy' in data and data['find_energy'] == 1.0 :
-            data['energy'] -= tot_e.reshape([nframes, 1])
+            data['energy'] -= tot_e.reshape(data['energy'].shape)
         if 'find_force' in data and data['find_force'] == 1.0 :
-            data['force'] -= tot_f
+            data['force'] -= tot_f.reshape(data['force'].shape)
         if 'find_virial' in data and data['find_virial'] == 1.0 :
-            data['virial'] -= tot_v.reshape([nframes, 9])
+            data['virial'] -= tot_v.reshape(data['virial'].shape)
 
 
                            
diff --git a/source/train/DeepDipole.py b/source/train/DeepDipole.py
index 3ef107b4a2..31b166284e 100644
--- a/source/train/DeepDipole.py
+++ b/source/train/DeepDipole.py
@@ -6,6 +6,7 @@
 
 class DeepDipole (DeepTensor) :
     def __init__(self, 
-                 model_file) :
-        DeepTensor.__init__(self, model_file, 'dipole', 3)
+                 model_file, 
+                 load_prefix = 'load') :
+        DeepTensor.__init__(self, model_file, 'dipole', 3, load_prefix = load_prefix)
 
diff --git a/source/train/DeepEval.py b/source/train/DeepEval.py
index ba383f245f..386d68a4e2 100644
--- a/source/train/DeepEval.py
+++ b/source/train/DeepEval.py
@@ -23,10 +23,11 @@ class DeepEval():
     common methods for DeepPot, DeepWFC, DeepPolar, ...
     """
     def __init__(self, 
-                 model_file) :
+                 model_file, 
+                 load_prefix = 'load') :
         model_file = model_file
-        self.graph = self._load_graph (model_file)
-        t_mt = self.graph.get_tensor_by_name('load/model_attr/model_type:0')
+        self.graph = self._load_graph (model_file, prefix = load_prefix)
+        t_mt = self.graph.get_tensor_by_name(os.path.join(load_prefix, 'model_attr/model_type:0'))
         sess = tf.Session (graph = self.graph)
         [mt] = sess.run([t_mt], feed_dict = {})
         self.model_type = mt.decode('utf-8')
@@ -112,25 +113,26 @@ class DeepTensor(DeepEval) :
     def __init__(self, 
                  model_file, 
                  variable_name,                  
-                 variable_dof) :
-        DeepEval.__init__(self, model_file)
+                 variable_dof, 
+                 load_prefix = 'load') :
+        DeepEval.__init__(self, model_file, load_prefix = load_prefix)
         # self.model_file = model_file
         # self.graph = self.load_graph (self.model_file)
         self.variable_name = variable_name
         self.variable_dof = variable_dof
         # checkout input/output tensors from graph
-        self.t_ntypes = self.graph.get_tensor_by_name ('load/descrpt_attr/ntypes:0')
-        self.t_rcut   = self.graph.get_tensor_by_name ('load/descrpt_attr/rcut:0')
-        self.t_tmap   = self.graph.get_tensor_by_name ('load/model_attr/tmap:0')
-        self.t_sel_type= self.graph.get_tensor_by_name ('load/model_attr/sel_type:0')
+        self.t_ntypes = self.graph.get_tensor_by_name (os.path.join(load_prefix, 'descrpt_attr/ntypes:0'))
+        self.t_rcut   = self.graph.get_tensor_by_name (os.path.join(load_prefix, 'descrpt_attr/rcut:0'))
+        self.t_tmap   = self.graph.get_tensor_by_name (os.path.join(load_prefix, 'model_attr/tmap:0'))
+        self.t_sel_type= self.graph.get_tensor_by_name (os.path.join(load_prefix, 'model_attr/sel_type:0'))
         # inputs
-        self.t_coord  = self.graph.get_tensor_by_name ('load/t_coord:0')
-        self.t_type   = self.graph.get_tensor_by_name ('load/t_type:0')
-        self.t_natoms = self.graph.get_tensor_by_name ('load/t_natoms:0')
-        self.t_box    = self.graph.get_tensor_by_name ('load/t_box:0')
-        self.t_mesh   = self.graph.get_tensor_by_name ('load/t_mesh:0')
+        self.t_coord  = self.graph.get_tensor_by_name (os.path.join(load_prefix, 't_coord:0'))
+        self.t_type   = self.graph.get_tensor_by_name (os.path.join(load_prefix, 't_type:0'))
+        self.t_natoms = self.graph.get_tensor_by_name (os.path.join(load_prefix, 't_natoms:0'))
+        self.t_box    = self.graph.get_tensor_by_name (os.path.join(load_prefix, 't_box:0'))
+        self.t_mesh   = self.graph.get_tensor_by_name (os.path.join(load_prefix, 't_mesh:0'))
         # outputs
-        self.t_tensor = self.graph.get_tensor_by_name ('load/o_%s:0' % self.variable_name)
+        self.t_tensor = self.graph.get_tensor_by_name (os.path.join(load_prefix, 'o_%s:0' % self.variable_name))
         # start a tf session associated to the graph
         self.sess = tf.Session (graph = self.graph)        
         [self.ntypes, self.rcut, self.tmap, self.tselt] = self.sess.run([self.t_ntypes, self.t_rcut, self.t_tmap, self.t_sel_type])
diff --git a/source/train/DeepPot.py b/source/train/DeepPot.py
index 9430216c02..88bf92f63d 100644
--- a/source/train/DeepPot.py
+++ b/source/train/DeepPot.py
@@ -5,6 +5,7 @@
 from deepmd.env import tf
 from deepmd.common import make_default_mesh
 from deepmd.DeepEval import DeepEval
+from deepmd.DataModifier import DipoleChargeModifier
 
 class DeepPot (DeepEval) :
     def __init__(self, 
@@ -46,6 +47,22 @@ def __init__(self,
         self.sess = tf.Session (graph = self.graph)        
         [self.ntypes, self.rcut, self.dfparam, self.daparam, self.tmap] = self.sess.run([self.t_ntypes, self.t_rcut, self.t_dfparam, self.t_daparam, self.t_tmap])
         self.tmap = self.tmap.decode('UTF-8').split()
+        # setup modifier
+        try:
+            t_modifier_type = self.graph.get_tensor_by_name('load/modifier_attr/type:0')
+            self.modifier_type = self.sess.run(t_modifier_type).decode('UTF-8')
+        except ValueError:
+            self.modifier_type = None
+        if self.modifier_type == 'dipole_charge':
+            t_mdl_name = self.graph.get_tensor_by_name('load/modifier_attr/mdl_name:0')
+            t_mdl_charge_map = self.graph.get_tensor_by_name('load/modifier_attr/mdl_charge_map:0')
+            t_sys_charge_map = self.graph.get_tensor_by_name('load/modifier_attr/sys_charge_map:0')
+            t_ewald_h = self.graph.get_tensor_by_name('load/modifier_attr/ewald_h:0')
+            t_ewald_beta = self.graph.get_tensor_by_name('load/modifier_attr/ewald_beta:0')
+            [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = self.sess.run([t_mdl_name, t_mdl_charge_map, t_sys_charge_map, t_ewald_h, t_ewald_beta])
+            mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode('UTF-8').split()]
+            sys_charge_map = [int(ii) for ii in sys_charge_map.decode('UTF-8').split()]
+            self.dm = DipoleChargeModifier(mdl_name, mdl_charge_map, sys_charge_map, ewald_h = ewald_h, ewald_beta = ewald_beta)
 
 
     def get_ntypes(self) :
@@ -63,8 +80,27 @@ def get_dim_aparam(self) :
     def get_type_map(self):
         return self.tmap
 
-
     def eval(self,
+             coords,
+             cells,
+             atom_types,
+             fparam = None,
+             aparam = None,
+             atomic = False) :
+        if atomic :
+            if self.modifier_type is not None:
+                raise RuntimeError('modifier does not support atomic modification')
+            return self.eval_inner(coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic)
+        else :
+            e, f, v = self.eval_inner(coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic)
+            if self.modifier_type is not None:
+                me, mf, mv = self.dm.eval(coords, cells, atom_types)
+                e += me.reshape(e.shape)
+                f += mf.reshape(f.shape)
+                v += mv.reshape(v.shape)
+            return e, f, v
+
+    def eval_inner(self,
              coords, 
              cells, 
              atom_types, 
@@ -160,3 +196,4 @@ def eval(self,
         else :
             return energy, force, virial
 
+
diff --git a/source/train/test.py b/source/train/test.py
index 83f882e32b..29b66932dd 100755
--- a/source/train/test.py
+++ b/source/train/test.py
@@ -53,12 +53,24 @@ def test_ener (args) :
         aparam = test_data["aparam"][:numb_test] 
     else :
         aparam = None
-    energy, force, virial, ae, av = dp.eval(coord, box, atype, fparam = fparam, aparam = aparam, atomic = True)
+    detail_file = args.detail_file
+    if detail_file is not None:
+        atomic = True
+    else:
+        atomic = False
+
+    ret = dp.eval(coord, box, atype, fparam = fparam, aparam = aparam, atomic = atomic)
+    energy = ret[0]
+    force  = ret[1]
+    virial = ret[2]
     energy = energy.reshape([numb_test,1])
     force = force.reshape([numb_test,-1])
     virial = virial.reshape([numb_test,9])
-    ae = ae.reshape([numb_test,-1])
-    av = av.reshape([numb_test,-1])
+    if atomic:
+        ae = ret[3]
+        av = ret[4]
+        ae = ae.reshape([numb_test,-1])
+        av = av.reshape([numb_test,-1])
 
     l2e = (l2err (energy - test_data["energy"][:numb_test].reshape([-1,1])))
     l2f = (l2err (force  - test_data["force"] [:numb_test]))
@@ -74,7 +86,6 @@ def test_ener (args) :
     print ("Virial L2err        : %e eV" % l2v)
     print ("Virial L2err/Natoms : %e eV" % l2va)
 
-    detail_file = args.detail_file
     if detail_file is not None :
         pe = np.concatenate((np.reshape(test_data["energy"][:numb_test], [-1,1]),
                              np.reshape(energy, [-1,1])), 
diff --git a/source/train/train.py b/source/train/train.py
index 9fd4663057..874d66fb6a 100755
--- a/source/train/train.py
+++ b/source/train/train.py
@@ -110,7 +110,7 @@ def _do_work(jdata, run_opt):
        ipt_type_map = type_map
     # data modifier
     modifier = None
-    modi_data = jdata['training'].get("data_modifier", None)
+    modi_data = jdata['model'].get("modifier", None)
     if modi_data is not None:
        if modi_data['type'] == 'dipole_charge':
           modifier = DipoleChargeModifier(modi_data['model_name'],