diff --git a/.gitignore b/.gitignore
index d62d5d5a93..28934b59eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,9 @@
 *.pyc
 CMakeCache.txt
 CMakeFiles
+*.bk
+build
+_skbuild
+deepmd.egg-info
+dist
+
diff --git a/README.md b/README.md
index 8bbb53a6d0..722d0ff8c9 100644
--- a/README.md
+++ b/README.md
@@ -10,9 +10,12 @@
  	- [Deep Potential in a nutshell](#deep-potential-in-a-nutshell)
 - [Download and install](#download-and-install)
     - [Easy installation methods](#easy-installation-methods)
-    - [Install DeePMD-kit from scratch](#install-deepmd-kit-from-scratch)
-	    - [Install tensorflow](#install-tensorflow)
-	    - [Install DeePMD-kit](#install-deepmd-kit)
+    - [Install the python interaction](#install-the-python-interface)
+	    - [Install the Tensorflow's python interface](#install-the-tensorflows-python-interface)
+	    - [Install the DeePMD-kit's python interface](#install-the-deepmd-kits-python-interface)
+    - [Install the C++ interaction](#install-the-c-interface)
+	    - [Install the Tensorflow's C++ interface](#install-the-tensorflows-c-interface)    
+	    - [Install the DeePMD-kit's C++ interface](#install-the-deepmd-kits-c-interface)
 	    - [Install LAMMPS's DeePMD-kit module](#install-lammpss-deepmd-kit-module)
 	    - [Build DeePMD-kit with GPU support](#build-deepmd-kit-with-gpu-support)
 - [Use DeePMD-kit](#use-deepmd-kit)
@@ -21,11 +24,11 @@
 	    - [The DeePMD model](#the-deepmd-model)
 	    - [The DeepPot-SE model](#the-deeppot-se-model)
 	- [Freeze and test a model](#freeze-and-test-a-model)
+	- [Model inference](#model-inference)
 	- [Run MD with Lammps](#run-md-with-lammps)
 	    - [Include deepmd in the pair style](#include-deepmd-in-the-pair-style)
 	    - [Long-range interaction](#long-range-interaction)
 	- [Run path-integral MD with i-PI](#run-path-integral-md-with-i-pi)
-	- [Run MD with native code](#run-md-with-native-code)
 - [Troubleshooting](#troubleshooting)
 
 # About DeePMD-kit
@@ -83,15 +86,32 @@ Please follow our [github](https://github.com/deepmodeling/deepmd-kit) webpage t
 ## Easy installation methods
 A docker for installing the DeePMD-kit on CentOS 7 is available [here](https://github.com/frankhan91/deepmd-kit_docker). We are currently working on installation methods using the `conda` package management system and `pip` tools. Hope these will come out soon.
 
-## Install DeePMD-kit from scratch
-Installing DeePMD-kit from scratch is lengthy, but do not be panic. Just follow step by step. Wish you good luck.. 
+## Install the python interface 
 
-### Install tensorflow
-We tested two tensorflow installation options. You may follow either [tf-1.8](doc/install-tf.1.8.md) or [tf-1.12](doc/install-tf.1.12.md). Click one of the links and follow the instructions therein. Of course, other installation options are not forbidden.
+### Install the Tensorflow's python interface
+We follow the virtual environment approach to install the tensorflow's Python interface. The full instruction can be found on [the tensorflow's official website](https://www.tensorflow.org/install/pip). Now we assume that the Python interface will be installed to virtual environment directory `$tensorflow_venv`
+```bash
+virtualenv -p python3 $tensorflow_venv
+source $tensorflow_venv/bin/activate
+pip install --upgrade pip
+pip install --upgrade tensorflow==1.8.0
+```
+If one needs the GPU support of deepmd-kit, the GPU version of tensorflow should be installed by
+```bash
+pip install --upgrade tensorflow-gpu==1.8.0
+```
+To verify the installation, 
+```bash
+python -c "import tensorflow as tf; sess=tf.Session(); print(sess.run(tf.reduce_sum(tf.random_normal([1000, 1000]))))"
+```
+One should remember to activate the virtual environment every time he/she uses deepmd-kit.
 
-### Install DeePMD-kit
-The DeePMD-kit was tested with compiler gcc >= 4.9.
+One may also need the follow dependencies that are installed by
+```bash
+pip install --upgrade cmake scikit-build dpdata
+```
 
+### Install the DeePMD-kit's python interface
 Firstly clone the DeePMD-kit source code
 ```bash
 cd /some/workspace
@@ -102,7 +122,50 @@ If one downloads the .zip file from the github, then the default folder of sourc
 cd deepmd-kit
 deepmd_source_dir=`pwd`
 ```
-Then goto the source code directory and make a build directory.
+Then goto the source code directory and execute
+```bash
+cd $deepmd_source_dir/source
+python setup.py install	
+```
+To test the installation, one may execute
+```bash
+python -m deepmd -h
+```
+It will print the help information like
+```text
+usage: __main__.py [-h] {config,train,freeze,test} ...
+
+deepmd-kit
+
+optional arguments:
+  -h, --help            show this help message and exit
+
+Valid subcommands:
+  {config,train,freeze,test}
+    config              fast configuration of parameter file for smooth model
+    train               train a model
+    freeze              freeze the model
+    test                test the model
+```
+
+## Install the C++ interface 
+
+### Install the Tensorflow's C++ interface
+
+If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python interface does everything and he/she can safely skip this section. 
+
+The C++ interface of DeePMD-kit was tested with compiler gcc >= 4.9.
+
+Firstly the C++ interface of Tensorflow should be installed. It is noted that the version of Tensorflow C++ interface should be in consistent with that of the python interface. We assume that you have followed our instruction and installed tensorflow python interface 1.8.0, i.e.
+```bash
+pip install --upgrade tensorflow==1.8.0
+```
+then you may follow [the instruction here](doc/install-tf.1.8.md) to install the corresponding C++ interface. 
+
+Or you have installed Tensorflow's python interface 1.12.0, you may follow [here](doc/install-tf.1.12.md) to install the corresponding C++ interface.
+
+### Install the DeePMD-kit's C++ interface
+Now goto the source code directory of DeePMD-kit and make a build place.
 ```bash
 cd $deepmd_source_dir/source
 mkdir build 
@@ -110,18 +173,19 @@ cd build
 ```
 I assume you want to install DeePMD-kit into path `$deepmd_root`, then execute cmake
 ```bash
-cmake -DTF_GOOGLE_BIN=true -DTENSORFLOW_ROOT=$tensorflow_root \
--DCMAKE_INSTALL_PREFIX=$deepmd_root ..
+cmake -DTENSORFLOW_ROOT=$tensorflow_root -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
 ```
-If you built the tensorflow's Python interface by gcc>=5.0, then remove the option `-DTF_GOOGLE_BIN=true`. If the cmake has executed successfully, then 
+If the cmake has executed successfully, then 
 ```bash
 make
 make install
 ```
-If everything works fine, you will have the following executables installed in `$deepmd_root/bin`
+If everything works fine, you will have the following executable and libraries installed in `$deepmd_root/bin` and `$deepmd_root/lib`
 ```bash
 $ ls $deepmd_root/bin
-dp_frz  dp_ipi  dp_test  dp_train
+dp_ipi
+$ ls $deepmd_root/lib
+libdeepmd_ipi.so  libdeepmd_op.so  libdeepmd.so
 ```
 
 ### Install LAMMPS's DeePMD-kit module
@@ -234,9 +298,9 @@ It generates three sets `set.000`, `set.001` and `set.002`, with each set contai
 The method of training is explained in our [DeePMD paper][2]. With the source code we provide a small training dataset taken from 400 frames generated by NVT ab-initio water MD trajectory with 300 frames for training and 100 for testing. [An example training parameter file](./examples/train/water.json) is provided. One can try with the training by
 ```bash
 $ cd $deepmd_source_dir/examples/train/
-$ $deepmd_root/bin/dp_train water.json
+$ python -m deepmd train water.json
 ```
-`$deepmd_root/bin/dp_train` is the training program, and `water.json` is the `json` format parameter file that controls the training. The components of the `water.json` are
+`water.json` is the `json` format parameter file that controls the training. The components of the `water.json` are
 ```json
 {
     "_comment": " model parameters",
@@ -292,7 +356,7 @@ The option **`axis_rule`** specifies how to make the axis for the local coordina
 
 The option **`fitting_neuron`** (deprecated name **`n_neuron`**) is an integer vector that determines the shape the neural network. The size of the vector is identical to the number of hidden layers of the network. From left to right the members denote the sizes of each hidden layers from input end to the output end, respectively. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is build between them. If the option **`fitting_resnet_dt`** is set `true`, then a timestep is used in the ResNet.
 
-The option **`systems`** provide location of the systems (path to `set.*` and `type.raw`). It is a vector, thus DeePMD-kit allows you to provide multiple systems. DeePMD-kit will train the model with the systems in the vector one by one in a cyclic manner.
+The option **`systems`** provide location of the systems (path to `set.*` and `type.raw`). It is a vector, thus DeePMD-kit allows you to provide multiple systems. DeePMD-kit will train the model with the systems in the vector one by one in a cyclic manner. **It is warned that the example water data (in folder `examples/data/water`) is of very limited amount, is provided only for testing purpose, and should not be used to train a productive model.**
 
 The option **`batch_size`** specifies the number of frames in each batch. 
 The option **`stop_batch`** specifies the total number of batches will be used in the training.
@@ -309,13 +373,22 @@ Since we do not have virial data, the virial prefactors `start_pref_v` and `limi
 
 The option **`seed`** specifies the random seed for neural network initialization. If not provided, the `seed` will be initialized with `None`.
 
-During the training, the error of the model is tested every **`disp_freq`** batches with **`numb_test`** frames from the last set in the **`systems`** directory on the fly, and the results are output to **`disp_file`**. 
+During the training, the error of the model is tested every **`disp_freq`** batches with **`numb_test`** frames from the last set in the **`systems`** directory on the fly, and the results are output to **`disp_file`**. A typical `disp_file` looks like
+```bash
+# batch      l2_tst    l2_trn    l2_e_tst  l2_e_trn    l2_f_tst  l2_f_trn         lr
+      0    2.67e+01  2.57e+01    2.21e-01  2.22e-01    8.44e-01  8.12e-01    1.0e-03
+    100    6.14e+00  5.40e+00    3.01e-01  2.99e-01    1.93e-01  1.70e-01    1.0e-03
+    200    5.02e+00  4.49e+00    1.53e-01  1.53e-01    1.58e-01  1.42e-01    1.0e-03
+    300    4.36e+00  3.71e+00    7.32e-02  7.27e-02    1.38e-01  1.17e-01    1.0e-03
+    400    4.04e+00  3.29e+00    3.16e-02  3.22e-02    1.28e-01  1.04e-01    1.0e-03
+```
+The first column displays the number of batches. The second and third columns display the loss function evaluated by `numb_test` frames randomly chosen from the test set and that evaluated by the current training batch, respectively. The fourth and fifth columns display the RMS energy error (normalized by number of atoms) evaluated by `numb_test` frames randomly chosen from the test set and that evaluated by the current training batch, respectively. The sixth and seventh columns display the RMS force error (component-wise) evaluated by `numb_test` frames randomly chosen from the test set and that evaluated by the current training batch, respectively. The last column displays the current learning rate.
 
 Checkpoints will be written to files with prefix **`save_ckpt`** every **`save_freq`** batches. If **`restart`** is set to `true`, then the training will start from the checkpoint named **`load_ckpt`**, rather than from scratch.
 
-Several command line options can be passed to `dp_train`, which can be checked with
+Several command line options can be passed to `python -m deepmd train`, which can be checked with
 ```bash
-$ $deepmd_root/bin/dp_train --help
+$ python -m deepmd train --help
 ```
 An explanation will be provided
 ```
@@ -340,7 +413,7 @@ The keys `intra_op_parallelism_threads` and `inter_op_parallelism_threads` are T
 The smooth version of DeePMD, or the [DeepPot-SE model][3], can also be trained by DeePMD-kit. [An example training parameter file](./examples/train/water_smth.json) is provided. One can try with the training by
 ```bash
 $ cd $deepmd_source_dir/examples/train/
-$ $deepmd_root/bin/dp_train water_smth.json
+$ python -m deepmd train water_smth.json
 ```
 The difference between the standard and smooth DeePMD models lies in the model parameters:
 ```json
@@ -369,18 +442,19 @@ The **`filter_neuron`** provides the size of the filter network (also called loc
 ## Freeze and test a model
 The trained neural network is extracted from a checkpoint and dumped into a database. This process is called "freezing" a model. The idea and part of our code are from [Morgan](https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc). To freeze a model, typically one does
 ```bash
-$ $deepmd_root/bin/dp_frz -o graph.pb
+$ python -m deepmd freeze -o graph.pb
 ```
 in the folder where the model is trained. The output database is called `graph.pb`.
 
-The frozen model can be used in many ways. The most straightforward test can be performed using `dp_test`. Several command line options can be passed to `dp_test`, which can be checked with
+The frozen model can be used in many ways. The most straightforward test can be performed using `python -m deepmd test`. Several command line options can be passed to `python -m deepmd test`, which can be checked with
 ```bash
-$ $deepmd_root/bin/dp_test --help
+$ python -m deepmd test --help
 ```
 An explanation will be provided
 ```
-usage: dp_test [-h] [-m MODEL] [-s SYSTEM] [-S SET_PREFIX] [-n NUMB_TEST]
-               [-d DETAIL_FILE]
+usage: __main__.py test [-h] [-m MODEL] [-s SYSTEM] [-S SET_PREFIX]
+                        [-n NUMB_TEST] [-r RAND_SEED] [--shuffle-test]
+                        [-d DETAIL_FILE]
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -392,11 +466,27 @@ optional arguments:
                         The set prefix
   -n NUMB_TEST, --numb-test NUMB_TEST
                         The number of data for test
+  -r RAND_SEED, --rand-seed RAND_SEED
+                        The random seed
+  --shuffle-test        Shuffle test data
   -d DETAIL_FILE, --detail-file DETAIL_FILE
                         The file containing details of energy force and virial
                         accuracy
 ```
-The files `dp_frz` and `dp_test` may also serve as a python template for further analyses and more user-specific applications.
+
+## Model inference 
+One may use the python interface of DeePMD-kit for model inference, an example is given as follows
+```python
+import deepmd.DeepPot as DP
+import numpy as np
+dp = DP('graph.pb')
+coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1])
+cell = np.diag(10 * np.ones(3)).reshape([1, -1])
+atype = [1,0,1]
+e, f, v = dp.eval(coord, cell, atype)
+```
+where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
+
 
 ## Run MD with LAMMPS
 ### Include deepmd in the pair style
@@ -447,60 +537,6 @@ The option **`graph_file`** provides the file name of the frozen model.
 The `dp_ipi` gets the atom names from an [XYZ file](https://en.wikipedia.org/wiki/XYZ_file_format) provided by **`coord_file`** (meanwhile ignores all coordinates in it), and translates the names to atom types by rules provided by **`atom_type`**.
 
 
-## Run MD with native code
-DeePMD-kit provides a simple MD implementation that runs under either NVE or NVT ensemble. One needs to provide the following input files
-```bash
-$ ls
-conf.gro  graph.pb  water.json
-```
-`conf.gro` is the file that provides the initial coordinates and/or velocities of all atoms in the system. It is of Gromacs `gro` format. Details of this format can be find in [this website](http://manual.gromacs.org/current/online/gro.html). It should be notice that the length unit of the `gro` format is **nm** rather than A.
-
-`graph.pb` is the frozen model.
-
-`water.json` is the parameter file that specifies how the MD runs. [An example parameter file](./examples/md/water.json) for water NVT simulation is provided. 
-```json
-{
-    "conf_file":	"conf.gro",
-    "conf_format":	"gro",
-    "graph_file":	"graph.pb",
-    "nsteps":		500000,
-    "dt": 		5e-4,
-    "ener_freq":	20,
-    "ener_file":	"energy.out",
-    "xtc_freq":		20,
-    "xtc_file":		"traj.xtc",
-    "trr_freq":		20,
-    "trr_file":		"traj.trr",
-    "print_force":	false,
-    "T":		300,
-    "tau_T":		0.1,
-    "rand_seed":	2017,
-    "atom_type" : {
-	"OW":		0, 
-	"HW1":		1,
-	"HW2":		1
-    },
-    "atom_mass" : {
-	"OW":		16, 
-	"HW1":		1,
-	"HW2":		1
-    }
-}
-```
-The options **`conf_file`**, **`conf_format`** and **`graph_file`** are self-explanatory. It should be noticed, again, the length unit is nm in the `gro` format file.
-
-The option **`nsteps`** specifies the number of time steps of the MD simulation. The option **`dt`** specifies the timestep of the simulation. 
-
-The options **`ener_file`** and **`ener_freq`** specify the energy output file and frequency. 
-
-The options **`xtc_file`**, **`xtc_freq`**, **`trr_file`** and **`trr_freq`** are similar options that specify the output files and frequencies of the xtc and trr trajectory, respectively. When the frequencies are set to 0, the corresponding file will not be output. The instructions of the xtc and trr formats can be found in [xtc manual](http://manual.gromacs.org/online/xtc.html) and [trr manual](http://manual.gromacs.org/online/trr.html). It is noticed that the length unit in the xtc and trr files is **nm**.
-
-If the option **`print_force`** is set to `true`, then the atomic force will be output.
-
-The option **`T`** specifies the temperature of the simulation, and the option **`tau_T`** specifies the timescale of the thermostat. We implement the Langevin thermostat for the NVT simulation. **`rand_seed`** set the random seed of the random generator in the thermostat.
-
-The **`atom_type`** set the type for the atoms in the system. The names of the atoms are those provided in the `conf_file` file. The **`atom_mass`** set the mass for the atoms. Again, the name of the atoms are those provided in the `conf_file`.
-
 # Troubleshooting
 In consequence of various differences of computers or systems, problems may occur. Some common circumstances are listed as follows. 
 If other unexpected problems occur, you're welcome to contact us for help.
diff --git a/data/raw/raw_to_set.sh b/data/raw/raw_to_set.sh
index 863a59c7c7..58f2ab4ce5 100755
--- a/data/raw/raw_to_set.sh
+++ b/data/raw/raw_to_set.sh
@@ -16,6 +16,7 @@ test -f energy.raw && split energy.raw -l $nline_per_set -d -a 3 energy.raw
 test -f force.raw  && split force.raw  -l $nline_per_set -d -a 3 force.raw
 test -f virial.raw && split virial.raw -l $nline_per_set -d -a 3 virial.raw
 test -f atom_ener.raw && split atom_ener.raw -l $nline_per_set -d -a 3 atom_ener.raw
+test -f fparam.raw && split fparam.raw -l $nline_per_set -d -a 3 fparam.raw
 
 nset=`ls | grep box.raw[0-9] | wc -l`
 nset_1=$(($nset-1))
@@ -32,6 +33,7 @@ do
   test -f force.raw$pi  && mv force.raw$pi  set.$pi/force.raw
   test -f virial.raw$pi && mv virial.raw$pi set.$pi/virial.raw
   test -f atom_ener.raw$pi && mv atom_ener.raw$pi set.$pi/atom_ener.raw
+  test -f fparam.raw$pi && mv fparam.raw$pi set.$pi/fparam.raw
 
   cd set.$pi
   python -c 'import numpy as np; data = np.loadtxt("box.raw"   ); data = data.astype (np.float32); np.save ("box",    data)'
@@ -63,6 +65,13 @@ if os.path.isfile("atom_ener.raw"):
    data = np.loadtxt("atom_ener.raw"); 
    data = data.astype (np.float32); 
    np.save ("atom_ener", data)
+'
+  python -c \
+'import numpy as np; import os.path; 
+if os.path.isfile("fparam.raw"): 
+   data = np.loadtxt("fparam.raw"); 
+   data = data.astype (np.float32); 
+   np.save ("fparam", data)
 '
   rm *.raw
   cd ../
diff --git a/doc/install-tf.1.12.md b/doc/install-tf.1.12.md
index 42a297cee8..b54582d1a7 100644
--- a/doc/install-tf.1.12.md
+++ b/doc/install-tf.1.12.md
@@ -1,18 +1,3 @@
-# Install tensorflow's Python interface 
-We follow the virtual environment approach to install the tensorflow's Python interface. The full instruction can be found on [the tensorflow's official website](https://www.tensorflow.org/install/pip). Now we assume that the Python interface will be installed to virtual environment directory `$tensorflow_venv`
-```bash
-virtualenv --system-site-packages -p python3 $tensorflow_venv
-source $tensorflow_venv/bin/activate
-pip install --upgrade pip
-pip install --upgrade tensorflow==1.12.0
-```
-To verify the installation, 
-```bash
-python -c "import tensorflow as tf; tf.enable_eager_execution(); print(tf.reduce_sum(tf.random_normal([1000, 1000])))"
-```
-
-One should remember to activate the virtual environment every time he/she runs deepmd training program `dp_train`.
-
 # Install tensorflow's C++ interface 
 The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.15.0 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
 ```bash
diff --git a/doc/install-tf.1.8.md b/doc/install-tf.1.8.md
index fd4ef0acca..8c7902f044 100644
--- a/doc/install-tf.1.8.md
+++ b/doc/install-tf.1.8.md
@@ -1,18 +1,3 @@
-# Install tensorflow's Python interface 
-We follow the virtual environment approach to install the tensorflow's Python interface. The full instruction can be found on [the tensorflow's official website](https://www.tensorflow.org/install/pip). Now we assume that the Python interface will be installed to virtual environment directory `$tensorflow_venv`
-```bash
-virtualenv --system-site-packages -p python3 $tensorflow_venv
-source $tensorflow_venv/bin/activate
-pip install --upgrade pip
-pip install --upgrade tensorflow==1.8.0
-```
-To verify the installation, 
-```bash
-python -c "import tensorflow as tf; sess=tf.Session(); print(sess.run(tf.reduce_sum(tf.random_normal([1000, 1000]))))"
-```
-
-One should remember to activate the virtual environment every time he/she runs deepmd training program `dp_train`.
-
 # Install tensorflow's C++ interface
 The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.10.0 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
 ```bash
diff --git a/examples/fparam/data/.gitignore b/examples/fparam/data/.gitignore
new file mode 100644
index 0000000000..b440c7f944
--- /dev/null
+++ b/examples/fparam/data/.gitignore
@@ -0,0 +1,2 @@
+*raw
+
diff --git a/examples/fparam/data/e3000_i2000/set.000/box.npy b/examples/fparam/data/e3000_i2000/set.000/box.npy
new file mode 100644
index 0000000000..23b62d305e
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.000/box.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.000/coord.npy b/examples/fparam/data/e3000_i2000/set.000/coord.npy
new file mode 100644
index 0000000000..7d9256768c
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.000/coord.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.000/energy.npy b/examples/fparam/data/e3000_i2000/set.000/energy.npy
new file mode 100644
index 0000000000..c7ec1b4b9a
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.000/energy.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.000/force.npy b/examples/fparam/data/e3000_i2000/set.000/force.npy
new file mode 100644
index 0000000000..6c41386594
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.000/force.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.000/fparam.npy b/examples/fparam/data/e3000_i2000/set.000/fparam.npy
new file mode 100644
index 0000000000..8a9203a7ed
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.000/fparam.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.001/box.npy b/examples/fparam/data/e3000_i2000/set.001/box.npy
new file mode 100644
index 0000000000..23b62d305e
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.001/box.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.001/coord.npy b/examples/fparam/data/e3000_i2000/set.001/coord.npy
new file mode 100644
index 0000000000..95f2115353
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.001/coord.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.001/energy.npy b/examples/fparam/data/e3000_i2000/set.001/energy.npy
new file mode 100644
index 0000000000..771741ecd6
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.001/energy.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.001/force.npy b/examples/fparam/data/e3000_i2000/set.001/force.npy
new file mode 100644
index 0000000000..8001ccdbbf
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.001/force.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.001/fparam.npy b/examples/fparam/data/e3000_i2000/set.001/fparam.npy
new file mode 100644
index 0000000000..8a9203a7ed
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.001/fparam.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.002/box.npy b/examples/fparam/data/e3000_i2000/set.002/box.npy
new file mode 100644
index 0000000000..23b62d305e
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.002/box.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.002/coord.npy b/examples/fparam/data/e3000_i2000/set.002/coord.npy
new file mode 100644
index 0000000000..6157888a69
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.002/coord.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.002/energy.npy b/examples/fparam/data/e3000_i2000/set.002/energy.npy
new file mode 100644
index 0000000000..d4632c5c33
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.002/energy.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.002/force.npy b/examples/fparam/data/e3000_i2000/set.002/force.npy
new file mode 100644
index 0000000000..0977ef0adc
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.002/force.npy differ
diff --git a/examples/fparam/data/e3000_i2000/set.002/fparam.npy b/examples/fparam/data/e3000_i2000/set.002/fparam.npy
new file mode 100644
index 0000000000..8a9203a7ed
Binary files /dev/null and b/examples/fparam/data/e3000_i2000/set.002/fparam.npy differ
diff --git a/examples/fparam/data/e3000_i2000/type.raw b/examples/fparam/data/e3000_i2000/type.raw
new file mode 100644
index 0000000000..1ba41c4cdb
--- /dev/null
+++ b/examples/fparam/data/e3000_i2000/type.raw
@@ -0,0 +1,54 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/examples/fparam/data/e8000_i2000/set.000/box.npy b/examples/fparam/data/e8000_i2000/set.000/box.npy
new file mode 100644
index 0000000000..23b62d305e
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.000/box.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.000/coord.npy b/examples/fparam/data/e8000_i2000/set.000/coord.npy
new file mode 100644
index 0000000000..ff7ac01e25
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.000/coord.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.000/energy.npy b/examples/fparam/data/e8000_i2000/set.000/energy.npy
new file mode 100644
index 0000000000..1e4f2ccef4
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.000/energy.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.000/force.npy b/examples/fparam/data/e8000_i2000/set.000/force.npy
new file mode 100644
index 0000000000..ad75177880
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.000/force.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.000/fparam.npy b/examples/fparam/data/e8000_i2000/set.000/fparam.npy
new file mode 100644
index 0000000000..88b5d9b8d6
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.000/fparam.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.001/box.npy b/examples/fparam/data/e8000_i2000/set.001/box.npy
new file mode 100644
index 0000000000..23b62d305e
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.001/box.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.001/coord.npy b/examples/fparam/data/e8000_i2000/set.001/coord.npy
new file mode 100644
index 0000000000..19e4cb5b29
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.001/coord.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.001/energy.npy b/examples/fparam/data/e8000_i2000/set.001/energy.npy
new file mode 100644
index 0000000000..85d4cadee9
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.001/energy.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.001/force.npy b/examples/fparam/data/e8000_i2000/set.001/force.npy
new file mode 100644
index 0000000000..f61db2e28d
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.001/force.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.001/fparam.npy b/examples/fparam/data/e8000_i2000/set.001/fparam.npy
new file mode 100644
index 0000000000..88b5d9b8d6
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.001/fparam.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.002/box.npy b/examples/fparam/data/e8000_i2000/set.002/box.npy
new file mode 100644
index 0000000000..23b62d305e
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.002/box.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.002/coord.npy b/examples/fparam/data/e8000_i2000/set.002/coord.npy
new file mode 100644
index 0000000000..f316535bc7
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.002/coord.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.002/energy.npy b/examples/fparam/data/e8000_i2000/set.002/energy.npy
new file mode 100644
index 0000000000..929966af65
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.002/energy.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.002/force.npy b/examples/fparam/data/e8000_i2000/set.002/force.npy
new file mode 100644
index 0000000000..401bd81043
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.002/force.npy differ
diff --git a/examples/fparam/data/e8000_i2000/set.002/fparam.npy b/examples/fparam/data/e8000_i2000/set.002/fparam.npy
new file mode 100644
index 0000000000..88b5d9b8d6
Binary files /dev/null and b/examples/fparam/data/e8000_i2000/set.002/fparam.npy differ
diff --git a/examples/fparam/data/e8000_i2000/type.raw b/examples/fparam/data/e8000_i2000/type.raw
new file mode 100644
index 0000000000..1ba41c4cdb
--- /dev/null
+++ b/examples/fparam/data/e8000_i2000/type.raw
@@ -0,0 +1,54 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/examples/fparam/lmp/.gitignore b/examples/fparam/lmp/.gitignore
new file mode 100644
index 0000000000..7cbc28b8a1
--- /dev/null
+++ b/examples/fparam/lmp/.gitignore
@@ -0,0 +1,3 @@
+frozen_model.pb
+log.lammps
+*dump*
diff --git a/examples/fparam/lmp/conf.lmp b/examples/fparam/lmp/conf.lmp
new file mode 100644
index 0000000000..f7292ef96d
--- /dev/null
+++ b/examples/fparam/lmp/conf.lmp
@@ -0,0 +1,64 @@
+
+54 atoms
+1 atom types
+   0.0000000000   10.4280799300 xlo xhi
+   0.0000000000   10.4280799300 ylo yhi
+   0.0000000000   10.4280799300 zlo zhi
+   0.0000000000    0.0000000000    0.0000000000 xy xz yz
+
+Atoms # atomic
+
+     1      1    0.4643750000    0.3949230000   10.2940500000
+     2      1    3.5348720000    0.2520400000   10.3158220000
+     3      1    7.2324620000    0.1228930000   10.1355050000
+     4      1    9.9507070000    3.2752620000    0.0515780000
+     5      1    3.3706880000    3.3152460000   10.3646920000
+     6      1    6.7449020000    3.3255490000   10.2638940000
+     7      1    0.0488410000    6.7351830000   10.3711260000
+     8      1    3.4651590000    7.0327210000    0.1006810000
+     9      1    6.9307650000    6.6957410000    0.1507120000
+    10      1   10.1233330000    0.1368890000    3.3011780000
+    11      1    3.3081460000    0.0219510000    3.4692380000
+    12      1    7.1162540000   10.0116820000    3.8228820000
+    13      1   10.2833810000    3.3816330000    3.5961620000
+    14      1    3.6750080000    3.1843190000    3.9967240000
+    15      1    6.9892550000    3.2920210000    3.6984440000
+    16      1    0.4982230000    7.2412900000    3.2778060000
+    17      1    3.5704530000    6.5520740000    3.4567950000
+    18      1    6.8032210000    7.0392460000    3.4280710000
+    19      1   10.3091590000    0.0994260000    6.8924220000
+    20      1    3.4370550000   10.3684960000    6.9731360000
+    21      1    7.2526880000    9.9857130000    7.1602920000
+    22      1    9.9882210000    3.8193700000    6.5101540000
+    23      1    3.3795490000    3.8468420000    6.9077400000
+    24      1    6.6164890000    3.2631720000    6.7142520000
+    25      1   10.1576680000    7.1165020000    7.2578040000
+    26      1    3.3743670000    6.9032890000    6.7801200000
+    27      1    7.1298310000    6.9588770000    7.2617850000
+    28      1    1.5422020000    2.1329710000    1.5503380000
+    29      1    5.6699180000    1.1054690000    1.7107510000
+    30      1    8.5130900000    1.7898790000    1.6984680000
+    31      1    1.9555310000    5.0886780000    1.5922740000
+    32      1    5.4747010000    4.8624430000    1.7339950000
+    33      1    8.9310980000    5.1557250000    1.8983040000
+    34      1    2.0521660000    8.8521970000    1.9591310000
+    35      1    5.0983580000    8.5203820000    1.4825890000
+    36      1    8.8847120000    8.4793550000    1.6471070000
+    37      1    1.8016150000    1.6971370000    5.4598140000
+    38      1    5.5814970000    1.4779090000    4.8725800000
+    39      1    8.7363780000    1.5159260000    5.4921280000
+    40      1    1.3323340000    5.5041190000    5.5846390000
+    41      1    5.4108570000    5.1845870000    5.4133500000
+    42      1    8.3963130000    5.5316890000    5.1707350000
+    43      1    1.6428930000    9.0357440000    5.3812830000
+    44      1    5.1413850000    8.5704560000    5.1667500000
+    45      1    8.9848480000    8.1466110000    5.0493800000
+    46      1    1.3577270000    2.2488970000    8.1097190000
+    47      1    4.9511170000    1.9240540000    8.3422290000
+    48      1    8.5515580000    2.2705350000    8.3764730000
+    49      1    1.7533270000    5.3976920000    8.8873150000
+    50      1    5.0125090000    5.4878990000    8.8070480000
+    51      1    8.6850970000    4.9585190000    8.8666720000
+    52      1    1.7618510000    8.6701780000    8.7299850000
+    53      1    5.1786470000    8.9149870000    8.5980960000
+    54      1    9.0434810000    9.0196510000    9.0660600000
diff --git a/examples/fparam/lmp/in.lammps b/examples/fparam/lmp/in.lammps
new file mode 100644
index 0000000000..ba2521bc77
--- /dev/null
+++ b/examples/fparam/lmp/in.lammps
@@ -0,0 +1,25 @@
+# bulk water
+
+units           metal
+boundary        p p p
+atom_style      atomic
+
+neighbor        2.0 bin
+neigh_modify    every 10 delay 0 check no
+
+read_data	conf.lmp
+mass 		1 16
+
+# pair_style	deepmd frozen_model.pb fparam 0.68938740
+pair_style	deepmd frozen_model.pb fparam 0.25852028
+pair_coeff	
+
+velocity        all create 2000 23456789
+
+fix             1 all nvt temp 2000 2000 0.5
+timestep        0.0005
+thermo_style    custom step pe ke etotal temp press vol
+thermo          100
+dump		1 all custom 100 traj.dump id type x y z fx fy fz
+
+run             1000
diff --git a/examples/fparam/train/.gitignore b/examples/fparam/train/.gitignore
new file mode 100644
index 0000000000..b5cec52a88
--- /dev/null
+++ b/examples/fparam/train/.gitignore
@@ -0,0 +1,5 @@
+*out
+model.ckpt*
+frozen_model.pb
+checkpoint
+
diff --git a/examples/fparam/train/input.json b/examples/fparam/train/input.json
new file mode 100644
index 0000000000..aa1498b06e
--- /dev/null
+++ b/examples/fparam/train/input.json
@@ -0,0 +1,62 @@
+{
+    "_comment": " model parameters",
+    "model" : {
+	"descriptor": {
+	    "type":		"se_a",
+	    "sel":		[60],
+	    "rcut_smth":	1.80,
+	    "rcut":		6.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	8,
+	    "seed":		1
+	},
+	"fitting_net" : {
+	    "neuron":		[120, 120, 120],
+	    "resnet_dt":	true,
+	    "numb_fparam":	1,
+	    "seed":		1
+	}
+    },
+
+    "loss" : {
+	"start_pref_e":	0.02,
+	"limit_pref_e":	1,
+	"start_pref_f":	1000,
+	"limit_pref_f":	1,
+	"start_pref_v":	0,
+	"limit_pref_v":	0
+    },
+
+    "learning_rate" : {
+	"start_lr":	0.001,
+	"decay_steps":	5000,
+	"decay_rate":	0.95
+    },
+
+    "_comment": " traing controls",
+    "training" : {
+	"systems":	["../data/e3000_i2000/", "../data/e8000_i2000/"],
+	"set_prefix":	"set",
+	"stop_batch":	1000000,
+	"batch_size":	1,
+
+	"seed":		1,
+
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":	"lcurve.out",
+	"disp_freq":	100,
+	"numb_test":	10,
+	"save_freq":	1000,
+	"save_ckpt":	"model.ckpt",
+	"load_ckpt":	"model.ckpt",
+	"disp_training":true,
+	"time_training":true,
+	"profiling":	false,
+	"profiling_file":	"timeline.json"
+    },
+
+    "_comment":		"that's all"
+}
+
diff --git a/examples/data/water/set.000/box.npy b/examples/water/data/set.000/box.npy
similarity index 100%
rename from examples/data/water/set.000/box.npy
rename to examples/water/data/set.000/box.npy
diff --git a/examples/data/water/set.000/coord.npy b/examples/water/data/set.000/coord.npy
similarity index 100%
rename from examples/data/water/set.000/coord.npy
rename to examples/water/data/set.000/coord.npy
diff --git a/examples/data/water/set.000/energy.npy b/examples/water/data/set.000/energy.npy
similarity index 100%
rename from examples/data/water/set.000/energy.npy
rename to examples/water/data/set.000/energy.npy
diff --git a/examples/data/water/set.000/force.npy b/examples/water/data/set.000/force.npy
similarity index 100%
rename from examples/data/water/set.000/force.npy
rename to examples/water/data/set.000/force.npy
diff --git a/examples/data/water/set.001/box.npy b/examples/water/data/set.001/box.npy
similarity index 100%
rename from examples/data/water/set.001/box.npy
rename to examples/water/data/set.001/box.npy
diff --git a/examples/data/water/set.001/coord.npy b/examples/water/data/set.001/coord.npy
similarity index 100%
rename from examples/data/water/set.001/coord.npy
rename to examples/water/data/set.001/coord.npy
diff --git a/examples/data/water/set.001/energy.npy b/examples/water/data/set.001/energy.npy
similarity index 100%
rename from examples/data/water/set.001/energy.npy
rename to examples/water/data/set.001/energy.npy
diff --git a/examples/data/water/set.001/force.npy b/examples/water/data/set.001/force.npy
similarity index 100%
rename from examples/data/water/set.001/force.npy
rename to examples/water/data/set.001/force.npy
diff --git a/examples/data/water/set.002/box.npy b/examples/water/data/set.002/box.npy
similarity index 100%
rename from examples/data/water/set.002/box.npy
rename to examples/water/data/set.002/box.npy
diff --git a/examples/data/water/set.002/coord.npy b/examples/water/data/set.002/coord.npy
similarity index 100%
rename from examples/data/water/set.002/coord.npy
rename to examples/water/data/set.002/coord.npy
diff --git a/examples/data/water/set.002/energy.npy b/examples/water/data/set.002/energy.npy
similarity index 100%
rename from examples/data/water/set.002/energy.npy
rename to examples/water/data/set.002/energy.npy
diff --git a/examples/data/water/set.002/force.npy b/examples/water/data/set.002/force.npy
similarity index 100%
rename from examples/data/water/set.002/force.npy
rename to examples/water/data/set.002/force.npy
diff --git a/examples/data/water/set.003/box.npy b/examples/water/data/set.003/box.npy
similarity index 100%
rename from examples/data/water/set.003/box.npy
rename to examples/water/data/set.003/box.npy
diff --git a/examples/data/water/set.003/coord.npy b/examples/water/data/set.003/coord.npy
similarity index 100%
rename from examples/data/water/set.003/coord.npy
rename to examples/water/data/set.003/coord.npy
diff --git a/examples/data/water/set.003/energy.npy b/examples/water/data/set.003/energy.npy
similarity index 100%
rename from examples/data/water/set.003/energy.npy
rename to examples/water/data/set.003/energy.npy
diff --git a/examples/data/water/set.003/force.npy b/examples/water/data/set.003/force.npy
similarity index 100%
rename from examples/data/water/set.003/force.npy
rename to examples/water/data/set.003/force.npy
diff --git a/examples/data/water/type.raw b/examples/water/data/type.raw
similarity index 100%
rename from examples/data/water/type.raw
rename to examples/water/data/type.raw
diff --git a/examples/ipi/water.json b/examples/water/ipi/water.json
similarity index 100%
rename from examples/ipi/water.json
rename to examples/water/ipi/water.json
diff --git a/examples/water/lmp/.gitignore b/examples/water/lmp/.gitignore
new file mode 100644
index 0000000000..0871681801
--- /dev/null
+++ b/examples/water/lmp/.gitignore
@@ -0,0 +1,3 @@
+log.lammps
+*.pb
+*.dump
diff --git a/examples/lmp/lammps.in b/examples/water/lmp/in.lammps
similarity index 100%
rename from examples/lmp/lammps.in
rename to examples/water/lmp/in.lammps
diff --git a/examples/lmp/water.lmp b/examples/water/lmp/water.lmp
similarity index 100%
rename from examples/lmp/water.lmp
rename to examples/water/lmp/water.lmp
diff --git a/examples/water/train/.gitignore b/examples/water/train/.gitignore
new file mode 100644
index 0000000000..a543fab633
--- /dev/null
+++ b/examples/water/train/.gitignore
@@ -0,0 +1,4 @@
+*.out
+*.pb
+model.ckpt*
+checkpoint
diff --git a/examples/water/train/water.json b/examples/water/train/water.json
new file mode 100644
index 0000000000..11eab3e429
--- /dev/null
+++ b/examples/water/train/water.json
@@ -0,0 +1,72 @@
+{
+    "with_distrib":	false,
+    "_comment": " model parameters",
+    "model":{
+	"type_map":		["O", "H"],
+	"descriptor": {
+	    "type":		"loc_frame",
+	    "sel_a":		[16, 32],
+	    "sel_r":		[30, 60],
+	    "rcut":		6.00,
+	    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
+	    "_comment":	" default rule: []",
+	    "_comment":	" user defined rule: for each type provides two axes, ",
+	    "_comment":	"                    for each axis: (a_or_r, type, idx)",
+	    "_comment":	"                    if type < 0, exclude type -(type+1)",
+	    "_comment":	"                    for water (O:0, H:1) it can be",
+	    "_comment":	"                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]",
+	    "_comment": " that's all"
+	},
+	"fitting_net": {
+	    "neuron":		[240, 120, 60, 30, 10],
+	    "resnet_dt":	true,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"_comment":	" that's all"
+    },
+    
+    "learning_rate" :{
+	"type":		"exp",
+	"start_lr":	0.001,
+	"decay_steps":	5000,
+	"decay_rate":	0.95,
+	"_comment":	"that's all"
+    },
+
+    "loss" : {
+	"type":		"std",		
+	"start_pref_e":	0.02,
+	"limit_pref_e":	8,
+	"start_pref_f":	1000,
+	"limit_pref_f":	1,
+	"start_pref_v":	0,
+	"limit_pref_v":	0,
+	"_comment":	"that's all"
+    },
+
+    "_comment": " traing controls",
+    "training": {
+	"systems":	["../data/"], 
+	"set_prefix":	"set",    
+	"stop_batch":	1000000,
+	"batch_size":	[4],
+
+	"seed":		1,
+
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":	"lcurve.out",
+	"disp_freq":	100,
+	"numb_test":	10,
+	"save_freq":	1000,
+	"save_ckpt":	"model.ckpt",
+	"load_ckpt":	"model.ckpt",
+	"disp_training":true,
+	"time_training":true,
+	"_comment":	"that's all"
+    },
+
+    "_comment":		"that's all"
+}
+
diff --git a/examples/water/train/water_se_a.json b/examples/water/train/water_se_a.json
new file mode 100644
index 0000000000..28d11c5073
--- /dev/null
+++ b/examples/water/train/water_se_a.json
@@ -0,0 +1,69 @@
+{
+    "_comment": " model parameters",
+    "model": {
+	"type_map":	["O", "H"],
+	"descriptor" :{
+	    "type":		"se_a",
+	    "sel":		[46, 92],
+	    "rcut_smth":	5.80,
+	    "rcut":		6.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	16,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"fitting_net" : {
+	    "neuron":		[240, 240, 240],
+	    "resnet_dt":	true,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"_comment":	" that's all"
+    },
+
+    "learning_rate" :{
+	"type":		"exp",
+	"start_lr":	0.005,
+	"decay_steps":	5000,
+	"decay_rate":	0.95,
+	"_comment":	"that's all"
+    },
+
+    "loss" :{
+	"start_pref_e":	0.02,
+	"limit_pref_e":	1,
+	"start_pref_f":	1000,
+	"limit_pref_f":	1,
+	"start_pref_v":	0,
+	"limit_pref_v":	0,
+	"_comment":	" that's all"
+    },
+
+    "_comment": " traing controls",
+    "training" : {
+	"systems":	["../data/"],
+	"set_prefix":	"set",    
+	"stop_batch":	1000000,
+	"batch_size":	1,
+
+	"seed":		1,
+
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":	"lcurve.out",
+	"disp_freq":	100,
+	"numb_test":	10,
+	"save_freq":	1000,
+	"save_ckpt":	"model.ckpt",
+	"load_ckpt":	"model.ckpt",
+	"disp_training":true,
+	"time_training":true,
+	"profiling":	false,
+	"profiling_file":"timeline.json",
+	"_comment":	"that's all"
+    },
+
+    "_comment":		"that's all"
+}
+
diff --git a/examples/water/train/water_se_ar.json b/examples/water/train/water_se_ar.json
new file mode 100644
index 0000000000..e2e1a2ed4c
--- /dev/null
+++ b/examples/water/train/water_se_ar.json
@@ -0,0 +1,61 @@
+{
+    "_comment": " model parameters",
+    "model_type":	"se_ar",
+    "model_a":	{
+	"sel_a":		[16, 32],
+	"rcut_smth":		1.00,
+	"rcut":			3.80,
+	"filter_neuron":	[10, 20, 40],
+	"filter_resnet_dt":	false,
+	"axis_neuron":		16,
+	"fitting_neuron":	[120, 120, 120],
+	"fitting_resnet_dt":	true,
+	"seed":			1,
+	"_comment":		"that's all"
+    },
+    "model_r":	{
+	"sel_r":		[46, 92],
+	"rcut_smth":		1.00,
+	"rcut":			6.00,
+	"filter_neuron":	[5, 10, 20],
+	"filter_resnet_dt":	false,
+	"fitting_neuron":	[120, 120, 120],
+	"fitting_resnet_dt":	true,
+	"seed":			1,
+	"_comment":		"that's all"
+    },
+
+    "_comment": " traing controls",
+    "systems":		["../data/"],
+    "set_prefix":	"set",    
+    "stop_batch":	1000000,
+    "batch_size":	1,
+    "start_lr":		0.005,
+    "decay_steps":	5000,
+    "decay_rate":	0.95,
+
+    "start_pref_e":	0.02,
+    "limit_pref_e":	1,
+    "start_pref_f":	1000,
+    "limit_pref_f":	1,
+    "start_pref_v":	0,
+    "limit_pref_v":	0,
+
+    "seed":		1,
+
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file":	"lcurve.out",
+    "disp_freq":	100,
+    "numb_test":	10,
+    "save_freq":	1000,
+    "save_ckpt":	"model.ckpt",
+    "load_ckpt":	"model.ckpt",
+    "disp_training":	true,
+    "time_training":	true,
+    "profiling":	false,
+    "profiling_file":	"timeline.json",
+
+    "_comment":		"that's all"
+}
+
diff --git a/examples/water/train/water_se_r.json b/examples/water/train/water_se_r.json
new file mode 100644
index 0000000000..c577047189
--- /dev/null
+++ b/examples/water/train/water_se_r.json
@@ -0,0 +1,68 @@
+{
+    "_comment": " model parameters",
+    "model_type":	"se_r",
+    "model":	{
+	"type_map":		["O", "H"],
+	"descriptor": {
+	    "type":		"se_r",
+	    "sel":		[46, 92],
+	    "rcut_smth":	1.00,
+	    "rcut":		6.00,
+	    "neuron":		[5, 10, 20],
+	    "resnet_dt":	false,
+	    "seed":		1,
+	    "_comment": " that's all"
+	},
+	"fitting_net" :{
+	    "neuron":		[120, 120, 120],
+	    "resnet_dt":	true,
+	    "seed":		1,
+	    "_comment":		"that's all"
+	},
+	"_comment":	" that's all"
+    },
+
+    "learning_rate" : {
+	"start_lr":	0.005,
+	"decay_steps":	5000,
+	"decay_rate":	0.95,
+	"_comment":	" that's all"
+    },
+
+    "loss" : {
+	"start_pref_e":	0.02,
+	"limit_pref_e":	1,
+	"start_pref_f":	1000,
+	"limit_pref_f":	1,
+	"start_pref_v":	0,
+	"limit_pref_v":	0,
+	"_comment":	" that's all"
+    },
+
+    "_comment": " traing controls",
+    "training" : {
+	"systems":	["../data/"],
+	"set_prefix":	"set",    
+	"stop_batch":	1000000,
+	"batch_size":	1,
+
+	"seed":		1,
+
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":	"lcurve.out",
+	"disp_freq":	100,
+	"numb_test":	10,
+	"save_freq":	1000,
+	"save_ckpt":	"model.ckpt",
+	"load_ckpt":	"model.ckpt",
+	"disp_training":true,
+	"time_training":true,
+	"profiling":	false,
+	"profiling_file":	"timeline.json",
+	"_comment":	"that's all"
+    },
+
+    "_comment":		"that's all"
+}
+
diff --git a/examples/water/train/water_srtab_example.json b/examples/water/train/water_srtab_example.json
new file mode 100644
index 0000000000..846017a24c
--- /dev/null
+++ b/examples/water/train/water_srtab_example.json
@@ -0,0 +1,74 @@
+{
+    "with_distrib":	false,
+    "_comment": " model parameters",
+    "model":{
+	"type_map":	["O", "H"],
+	"use_srtab":	"your_tab",
+	"smin_alpha":	float_alpha,
+	"sw_rmin":	float_rmin,
+	"sw_rmax":	float_rmax,
+	"descriptor": {
+	    "type":		"loc_frame",
+	    "sel_a":		[16, 32],
+	    "sel_r":		[30, 60],
+	    "rcut":		6.00,
+	    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
+	    "_comment":	" default rule: []",
+	    "_comment":	" user defined rule: for each type provides two axes, ",
+	    "_comment":	"                    for each axis: (a_or_r, type, idx)",
+	    "_comment":	"                    if type < 0, exclude type -(type+1)",
+	    "_comment":	"                    for water (O:0, H:1) it can be",
+	    "_comment":	"                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]",
+	    "_comment": " that's all"
+	},
+	"fitting_net": {
+	    "neuron":		[240, 120, 60, 30, 10],
+	    "resnet_dt":	true,
+	    "seed":		1,
+	    "_comment":		" that's all"
+	},
+	"_comment":	" that's all"
+    },
+
+    "learning_rate" :{
+	"type":		"exp",
+	"start_lr":	0.001,
+	"decay_steps":	5000,
+	"decay_rate":	0.95,
+	"_comment":	"that's all"
+    },
+
+    "loss" : {
+	"start_pref_e":	0.02,
+	"limit_pref_e":	8,
+	"start_pref_f":	1000,
+	"limit_pref_f":	1,
+	"start_pref_v":	0,
+	"limit_pref_v":	0,
+	"_comment":	" that's all"
+    },
+
+    "_comment": " training controls",
+    "training" : {
+	"systems":	["../data/"],
+	"set_prefix":	"set",    
+	"stop_batch":	1000000,
+	"batch_size":	4,
+
+	"seed":		1,
+
+	"_comment": " display and restart",
+	"_comment": " frequencies counted in batch",
+	"disp_file":	"lcurve.out",
+	"disp_freq":	100,
+	"numb_test":	10,
+	"save_freq":	1000,
+	"save_ckpt":	"model.ckpt",
+	"load_ckpt":	"model.ckpt",
+	"disp_training":true,
+	"time_training":true,
+	"_comment":	"that's all"
+    },
+    "_comment":		"that's all"
+}
+
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 1bdb10ba62..21e3598da2 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -1,6 +1,14 @@
 cmake_minimum_required(VERSION 3.0)
 project(DeePMD)
 
+# build cpp or python interfaces
+if (NOT DEFINED BUILD_CPP_IF) 
+  set(BUILD_CPP_IF TRUE)
+endif (NOT DEFINED BUILD_CPP_IF)
+if (NOT DEFINED BUILD_PY_IF) 
+  set(BUILD_PY_IF FALSE)
+endif (NOT DEFINED BUILD_PY_IF)
+
 find_package(Git)
 if(GIT_FOUND)
   execute_process(
@@ -29,9 +37,6 @@ if(GIT_FOUND)
     )
 endif(GIT_FOUND)
 
-# set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
-# set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
-
 # global defines
 list (APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/)
 list (APPEND CMAKE_CXX_FLAGS "-std=c++11 -Wno-ignored-attributes")
@@ -85,19 +90,26 @@ include_directories(${DeePMD_INCLUDE_DIRS})
 include_directories(${TensorFlow_INCLUDE_DIRS})
 
 # define names of libs
-set (LIB_DEEPMD		"deepmd")
-set (LIB_DEEPMD_OP	"deepmd_op")
-set (LIB_DEEPMD_NATIVE	"deepmd_native_md")
-set (LIB_DEEPMD_IPI	"deepmd_ipi")
+if (BUILD_CPP_IF)
+  set (LIB_DEEPMD		"deepmd")
+  set (LIB_DEEPMD_OP		"deepmd_op")
+  set (LIB_DEEPMD_NATIVE	"deepmd_native_md")
+  set (LIB_DEEPMD_IPI		"deepmd_ipi")
+endif (BUILD_CPP_IF)
 
 include_directories(${CMAKE_BINARY_DIR}/lib/)
-add_subdirectory (lib/)
 add_subdirectory (op/)
-add_subdirectory (train/)
+if (BUILD_PY_IF)
+  add_subdirectory (train/)
+  add_subdirectory (scripts/)
+  add_subdirectory (tests/)
+endif (BUILD_PY_IF)
+if (BUILD_CPP_IF) 
+  add_subdirectory (lib/)
+  add_subdirectory (lmp/)
 # add_subdirectory (md/)
-add_subdirectory (ipi/)
-add_subdirectory (scripts/)
-add_subdirectory (lmp/)
+  add_subdirectory (ipi/)
+endif (BUILD_CPP_IF)
 
 # uninstall target
 configure_file(
diff --git a/source/cmake/Findtensorflow.cmake b/source/cmake/Findtensorflow.cmake
index 82687c0c76..5c26eeb440 100644
--- a/source/cmake/Findtensorflow.cmake
+++ b/source/cmake/Findtensorflow.cmake
@@ -1,5 +1,6 @@
 # Input:
 # TENSORFLOW_ROOT 
+# BUILD_CPP_IF
 #
 # Output:
 # TensorFlow_FOUND        
@@ -32,27 +33,33 @@ if (NOT TensorFlow_INCLUDE_DIRS AND tensorflow_FIND_REQUIRED)
     "You can manually set the tensorflow install path by -DTENSORFLOW_ROOT ")
 endif ()
 
-# tensorflow_cc and tensorflow_framework
-if (NOT TensorFlow_FIND_COMPONENTS)
-  set(TensorFlow_FIND_COMPONENTS tensorflow_cc tensorflow_framework)
-endif ()
-# the lib
-set (TensorFlow_LIBRARY_PATH "")
-foreach (module ${TensorFlow_FIND_COMPONENTS})
-  find_library(TensorFlow_LIBRARY_${module}
-    NAMES ${module}
-    PATHS ${TensorFlow_search_PATHS} PATH_SUFFIXES lib NO_DEFAULT_PATH
-    )
-  if (TensorFlow_LIBRARY_${module})
-    list(APPEND TensorFlow_LIBRARY ${TensorFlow_LIBRARY_${module}})
-    get_filename_component(TensorFlow_LIBRARY_PATH_${module} ${TensorFlow_LIBRARY_${module}} PATH)
-    list(APPEND TensorFlow_LIBRARY_PATH ${TensorFlow_LIBRARY_PATH_${module}})
-  elseif (tensorflow_FIND_REQUIRED)
-    message(FATAL_ERROR 
-      "Not found lib/'${module}' in '${TensorFlow_search_PATHS}' "
-      "You can manually set the tensorflow install path by -DTENSORFLOW_ROOT ")
+if (BUILD_CPP_IF)
+  message (STATUS "Enabled cpp interface build, looking for tensorflow_cc and tensorflow_framework")
+  # tensorflow_cc and tensorflow_framework
+  if (NOT TensorFlow_FIND_COMPONENTS)
+    set(TensorFlow_FIND_COMPONENTS tensorflow_cc tensorflow_framework)
   endif ()
-endforeach ()
+  # the lib
+  set (TensorFlow_LIBRARY_PATH "")
+  foreach (module ${TensorFlow_FIND_COMPONENTS})
+    find_library(TensorFlow_LIBRARY_${module}
+      NAMES ${module}
+      PATHS ${TensorFlow_search_PATHS} PATH_SUFFIXES lib NO_DEFAULT_PATH
+      )
+    if (TensorFlow_LIBRARY_${module})
+      list(APPEND TensorFlow_LIBRARY ${TensorFlow_LIBRARY_${module}})
+      get_filename_component(TensorFlow_LIBRARY_PATH_${module} ${TensorFlow_LIBRARY_${module}} PATH)
+      list(APPEND TensorFlow_LIBRARY_PATH ${TensorFlow_LIBRARY_PATH_${module}})
+    elseif (tensorflow_FIND_REQUIRED)
+      message(FATAL_ERROR 
+	"Not found lib/'${module}' in '${TensorFlow_search_PATHS}' "
+	"You can manually set the tensorflow install path by -DTENSORFLOW_ROOT ")
+    endif ()
+  endforeach ()
+else (BUILD_CPP_IF)
+  message (STATUS "Disabled cpp interface build, looking for tensorflow_framework")
+endif (BUILD_CPP_IF)
+
 
 # tensorflow_framework
 if (NOT TensorFlowFramework_FIND_COMPONENTS)
@@ -76,12 +83,20 @@ foreach (module ${TensorFlowFramework_FIND_COMPONENTS})
   endif ()
 endforeach ()
 
-# define the output variable
-if (TensorFlow_INCLUDE_DIRS AND TensorFlow_LIBRARY AND TensorFlowFramework_LIBRARY)
-  set(TensorFlow_FOUND TRUE)
-else ()
-  set(TensorFlow_FOUND FALSE)
-endif ()
+if (BUILD_CPP_IF)
+  # define the output variable
+  if (TensorFlow_INCLUDE_DIRS AND TensorFlow_LIBRARY AND TensorFlowFramework_LIBRARY)
+    set(TensorFlow_FOUND TRUE)
+  else ()
+    set(TensorFlow_FOUND FALSE)
+  endif ()
+else (BUILD_CPP_IF)
+  if (TensorFlow_INCLUDE_DIRS AND TensorFlowFramework_LIBRARY)
+    set(TensorFlow_FOUND TRUE)
+  else ()
+    set(TensorFlow_FOUND FALSE)
+  endif ()
+endif (BUILD_CPP_IF)
 
 # print message
 if (NOT TensorFlow_FIND_QUIETLY)
diff --git a/source/lib/include/ComputeDescriptor.h b/source/lib/include/ComputeDescriptor.h
index b76c62bc48..394be03f83 100644
--- a/source/lib/include/ComputeDescriptor.h
+++ b/source/lib/include/ComputeDescriptor.h
@@ -68,7 +68,7 @@ void compute_descriptor (vector<double > &			descrpt_a,
 			 const int				axis1_idx);
 
 inline
-void compute_descriptor_norot (vector<double > &			descrpt_a,
+void compute_descriptor_se_a (vector<double > &			descrpt_a,
 			       vector<double > &			descrpt_a_deriv,
 			       vector<double > &			rij_a,
 			       const vector<double > &			posi,
@@ -82,6 +82,21 @@ void compute_descriptor_norot (vector<double > &			descrpt_a,
 			       const double &				rmin,
 			       const double &				rmax);
 
+inline
+void compute_descriptor_se_r (vector<double > &			descrpt_r,
+			      vector<double > &			descrpt_r_deriv,
+			      vector<double > &			rij_r,
+			      const vector<double > &		posi,
+			      const int &			ntypes,
+			      const vector<int > &		type,
+			      const SimulationRegion<double> &	region,
+			      const bool &			b_pbc,
+			      const int &			i_idx,
+			      const vector<int > &		fmt_nlist_r,
+			      const vector<int > &		sec_r,
+			      const double &			rmin, 
+			      const double &			rmax);
+
 
 struct NeighborInfo 
 {
@@ -915,7 +930,7 @@ spline5_switch (double & vv,
 
 // output deriv size: n_sel_a_nei x 4 x 12				    
 //		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) 
-void compute_descriptor_norot (vector<double > &			descrpt_a,
+void compute_descriptor_se_a (vector<double > &			descrpt_a,
 			       vector<double > &			descrpt_a_deriv,
 			       vector<double > &			rij_a,
 			       const vector<double > &			posi,
@@ -1002,4 +1017,73 @@ void compute_descriptor_norot (vector<double > &			descrpt_a,
 }
 
 
+void compute_descriptor_se_r (vector<double > &			descrpt,
+			      vector<double > &			descrpt_deriv,
+			      vector<double > &			rij,
+			      const vector<double > &		posi,
+			      const int &			ntypes,
+			      const vector<int > &		type,
+			      const SimulationRegion<double> &	region,
+			      const bool &			b_pbc,
+			      const int &			i_idx,
+			      const vector<int > &		fmt_nlist,
+			      const vector<int > &		sec,
+			      const double &			rmin, 
+			      const double &			rmax)
+{  
+  // compute the diff of the neighbors
+  vector<vector<double > > sel_diff (sec.back());
+  rij.resize (sec.back() * 3);
+  fill (rij.begin(), rij.end(), 0.0);
+  for (int ii = 0; ii < int(sec.size()) - 1; ++ii){
+    for (int jj = sec[ii]; jj < sec[ii+1]; ++jj){
+      if (fmt_nlist[jj] < 0) break;
+      sel_diff[jj].resize(3);
+      const int & j_idx = fmt_nlist[jj];
+      if (b_pbc){
+	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
+				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
+				    sel_diff[jj][0], sel_diff[jj][1], sel_diff[jj][2]);
+      }
+      else {
+	for (int dd = 0; dd < 3; ++dd) sel_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
+      }
+      for (int dd = 0; dd < 3; ++dd) rij[jj*3+dd] = sel_diff[jj][dd];
+    }
+  }
+  
+  // 1./rr
+  descrpt.resize (sec.back());
+  fill (descrpt.begin(), descrpt.end(), 0.0);
+  // deriv wrt center: 3
+  descrpt_deriv.resize (sec.back() * 3);
+  fill (descrpt_deriv.begin(), descrpt_deriv.end(), 0.0);
+
+  for (int sec_iter = 0; sec_iter < int(sec.size()) - 1; ++sec_iter){
+    for (int nei_iter = sec[sec_iter]; nei_iter < sec[sec_iter+1]; ++nei_iter) {      
+      if (fmt_nlist[nei_iter] < 0) break;
+      const double * rr = &sel_diff[nei_iter][0];
+      double nr2 = MathUtilities::dot(rr, rr);
+      double inr = 1./sqrt(nr2);
+      double nr = nr2 * inr;
+      double inr2 = inr * inr;
+      double inr4 = inr2 * inr2;
+      double inr3 = inr4 * nr;
+      double sw, dsw;
+      spline5_switch(sw, dsw, nr, rmin, rmax);
+      int idx_deriv = nei_iter * 3;	// 1 components time 3 directions
+      int idx_value = nei_iter;		// 1 components
+      // value components
+      descrpt[idx_value + 0] = 1./nr;
+      // deriv of component 1/r
+      descrpt_deriv[idx_deriv + 0] = rr[0] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_deriv[idx_deriv + 1] = rr[1] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_deriv[idx_deriv + 2] = rr[2] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[2] * inr;
+      // value components
+      descrpt[idx_value + 0] *= sw;
+    }
+  }
+}
+
+
 
diff --git a/source/lib/include/NNPInter.h b/source/lib/include/NNPInter.h
index 162325dd85..ce8145e3ab 100644
--- a/source/lib/include/NNPInter.h
+++ b/source/lib/include/NNPInter.h
@@ -63,7 +63,8 @@ class NNPInter
 		const vector<VALUETYPE> &	coord,
 		const vector<int> &		atype,
 		const vector<VALUETYPE> &	box, 
-		const int			nghost = 0);
+		const int			nghost = 0,
+		const vector<VALUETYPE>		fparam = vector<VALUETYPE>());
   void compute (ENERGYTYPE &			ener,
 		vector<VALUETYPE> &		force,
 		vector<VALUETYPE> &		virial,
@@ -71,7 +72,8 @@ class NNPInter
 		const vector<int> &		atype,
 		const vector<VALUETYPE> &	box, 
 		const int			nghost,
-		const LammpsNeighborList &	lmp_list);
+		const LammpsNeighborList &	lmp_list,
+		const vector<VALUETYPE>		fparam = vector<VALUETYPE>());
   void compute (ENERGYTYPE &			ener,
 		vector<VALUETYPE> &		force,
 		vector<VALUETYPE> &		virial,
@@ -79,7 +81,8 @@ class NNPInter
 		vector<VALUETYPE> &		atom_virial,
 		const vector<VALUETYPE> &	coord,
 		const vector<int> &		atype,
-		const vector<VALUETYPE> &	box);
+		const vector<VALUETYPE> &	box,
+		const vector<VALUETYPE>		fparam = vector<VALUETYPE>());
   void compute (ENERGYTYPE &			ener,
 		vector<VALUETYPE> &		force,
 		vector<VALUETYPE> &		virial,
@@ -89,19 +92,23 @@ class NNPInter
 		const vector<int> &		atype,
 		const vector<VALUETYPE> &	box, 
 		const int			nghost, 
-		const LammpsNeighborList &	lmp_list);
+		const LammpsNeighborList &	lmp_list,
+		const vector<VALUETYPE>		fparam = vector<VALUETYPE>());
   VALUETYPE cutoff () const {assert(inited); return rcut;};
   int numb_types () const {assert(inited); return ntypes;};
+  int dim_fparam () const {assert(inited); return dfparam;};
 private:
   Session* session;
   int num_intra_nthreads, num_inter_nthreads;
   GraphDef graph_def;
   bool inited;
+  template<class VT> VT get_scalar(const string & name) const;
   VALUETYPE get_rcut () const;
   int get_ntypes () const;
   VALUETYPE rcut;
   VALUETYPE cell_size;
   int ntypes;
+  int dfparam;
 };
 
 class NNPInterModelDevi
@@ -117,7 +124,8 @@ class NNPInterModelDevi
   		vector<VALUETYPE> &		model_devi,
   		const vector<VALUETYPE> &	coord,
   		const vector<int> &		atype,
-  		const vector<VALUETYPE> &	box);
+  		const vector<VALUETYPE> &	box,
+		const vector<VALUETYPE>		fparam = vector<VALUETYPE>());
   void compute (vector<ENERGYTYPE> &		all_ener,
 		vector<vector<VALUETYPE> > &	all_force,
 		vector<vector<VALUETYPE> > &	all_virial,
@@ -125,7 +133,8 @@ class NNPInterModelDevi
 		const vector<int> &		atype,
 		const vector<VALUETYPE> &	box,
 		const int			nghost,
-		const LammpsNeighborList &	lmp_list);
+		const LammpsNeighborList &	lmp_list,
+		const vector<VALUETYPE>		fparam = vector<VALUETYPE>());
   void compute (vector<ENERGYTYPE> &		all_ener,
 		vector<vector<VALUETYPE> > &	all_force,
 		vector<vector<VALUETYPE> > &	all_virial,
@@ -135,9 +144,11 @@ class NNPInterModelDevi
 		const vector<int> &		atype,
 		const vector<VALUETYPE> &	box,
 		const int			nghost,
-		const LammpsNeighborList &	lmp_list);
+		const LammpsNeighborList &	lmp_list,
+		const vector<VALUETYPE>		fparam = vector<VALUETYPE>());
   VALUETYPE cutoff () const {assert(inited); return rcut;};
   int numb_types () const {assert(inited); return ntypes;};
+  int dim_fparam () const {assert(inited); return dfparam;};
 #ifndef HIGH_PREC
   void compute_avg (ENERGYTYPE &		dener,
 		    const vector<ENERGYTYPE > &	all_energy);
@@ -158,11 +169,13 @@ class NNPInterModelDevi
   int num_intra_nthreads, num_inter_nthreads;
   vector<GraphDef> graph_defs;
   bool inited;
+  template<class VT> VT get_scalar(const string name) const;
   VALUETYPE get_rcut () const;
   int get_ntypes () const;
   VALUETYPE rcut;
   VALUETYPE cell_size;
   int ntypes;
+  int dfparam;
 };
 
 
diff --git a/source/lib/src/NNPInter.cc b/source/lib/src/NNPInter.cc
index 51d5fb1cd4..0b6c6ea445 100644
--- a/source/lib/src/NNPInter.cc
+++ b/source/lib/src/NNPInter.cc
@@ -1,6 +1,7 @@
 #include "NNPInter.h"
 #include "NNPAtomMap.h"
 #include "SimulationRegion.h"
+#include <stdexcept>	
 
 static
 void
@@ -58,7 +59,8 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		    const int &			ntypes,
 		    const vector<int> &		datype_,
 		    const vector<VALUETYPE> &	dbox, 
-		    const VALUETYPE &		cell_size, 
+		    const VALUETYPE &		cell_size,
+    		    const vector<VALUETYPE>	fparam_,
 		    const NNPAtomMap<VALUETYPE>&nnpmap,
 		    const int			nghost = 0)
 {
@@ -116,27 +118,29 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
   }
   TensorShape natoms_shape ;
   natoms_shape.AddDim (2 + ntypes);
+  TensorShape fparam_shape ;
+  fparam_shape.AddDim (nframes);
+  fparam_shape.AddDim (fparam_.size());
   
 #ifdef HIGH_PREC
   Tensor coord_tensor	(DT_DOUBLE, coord_shape);
-  Tensor type_tensor	(DT_INT32, type_shape);
   Tensor box_tensor	(DT_DOUBLE, box_shape);
-  Tensor mesh_tensor	(DT_INT32, mesh_shape);
-  Tensor natoms_tensor	(DT_INT32, natoms_shape);
+  Tensor fparam_tensor  (DT_DOUBLE, fparam_shape);
 #else
   Tensor coord_tensor	(DT_FLOAT, coord_shape);
-  Tensor type_tensor	(DT_INT32, type_shape);
   Tensor box_tensor	(DT_FLOAT, box_shape);
+  Tensor fparam_tensor  (DT_FLOAT, fparam_shape);
+#endif
+  Tensor type_tensor	(DT_INT32, type_shape);
   Tensor mesh_tensor	(DT_INT32, mesh_shape);
   Tensor natoms_tensor	(DT_INT32, natoms_shape);
-#endif
 
   auto coord = coord_tensor.matrix<VALUETYPE> ();
   auto type = type_tensor.matrix<int> ();
   auto box = box_tensor.matrix<VALUETYPE> ();
   auto mesh = mesh_tensor.flat<int> ();
-  auto natoms = natoms_tensor.flat<int> ();
-
+  auto natoms = natoms_tensor.flat<int> ();  
+  auto fparam = fparam_tensor.matrix<VALUETYPE> ();
 
   vector<VALUETYPE> dcoord (dcoord_);
   nnpmap.forward (dcoord.begin(), dcoord_.begin(), 3);
@@ -151,6 +155,9 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
     for (int jj = 0; jj < nall; ++jj){
       type(ii, jj) = datype[jj];
     }
+    for (int jj = 0; jj < fparam_.size(); ++jj){
+      fparam(ii, jj) = fparam_[jj];
+    }
   }
   mesh (1-1) = 0;
   mesh (2-1) = 0;
@@ -170,13 +177,25 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
   natoms (1) = nall;
   for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
 
-  input_tensors = {
-    {"t_coord",	coord_tensor}, 
-    {"t_type",	type_tensor},
-    {"t_box",	box_tensor},
-    {"t_mesh",	mesh_tensor},
-    {"t_natoms", natoms_tensor},
-  };  
+  if (fparam_.size() == 0) {
+    input_tensors = {
+      {"i_coord",	coord_tensor}, 
+      {"i_type",	type_tensor},
+      {"i_box",		box_tensor},
+      {"i_mesh",	mesh_tensor},
+      {"i_natoms",	natoms_tensor},
+    };  
+  }
+  else {
+    input_tensors = {
+      {"i_coord",	coord_tensor}, 
+      {"i_type",	type_tensor},
+      {"i_box",		box_tensor},
+      {"i_mesh",	mesh_tensor},
+      {"i_natoms",	natoms_tensor},
+      {"i_fparam",	fparam_tensor},
+    };  
+  }
 
   return nloc;
 }
@@ -188,6 +207,7 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		    const vector<int> &		datype_,
 		    const vector<VALUETYPE> &	dbox,		    
 		    InternalNeighborList &	dlist, 
+    		    const vector<VALUETYPE>	fparam_,
 		    const NNPAtomMap<VALUETYPE>&nnpmap,
     		    const int			nghost)
 {
@@ -218,26 +238,29 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
   mesh_shape.AddDim (16);
   TensorShape natoms_shape ;
   natoms_shape.AddDim (2 + ntypes);
+  TensorShape fparam_shape ;
+  fparam_shape.AddDim (nframes);
+  fparam_shape.AddDim (fparam_.size());
   
 #ifdef HIGH_PREC
   Tensor coord_tensor	(DT_DOUBLE, coord_shape);
-  Tensor type_tensor	(DT_INT32, type_shape);
   Tensor box_tensor	(DT_DOUBLE, box_shape);
-  Tensor mesh_tensor	(DT_INT32, mesh_shape);
-  Tensor natoms_tensor	(DT_INT32, natoms_shape);
+  Tensor fparam_tensor  (DT_DOUBLE, fparam_shape);
 #else
   Tensor coord_tensor	(DT_FLOAT, coord_shape);
-  Tensor type_tensor	(DT_INT32, type_shape);
   Tensor box_tensor	(DT_FLOAT, box_shape);
+  Tensor fparam_tensor  (DT_FLOAT, fparam_shape);
+#endif
+  Tensor type_tensor	(DT_INT32, type_shape);
   Tensor mesh_tensor	(DT_INT32, mesh_shape);
   Tensor natoms_tensor	(DT_INT32, natoms_shape);
-#endif
 
   auto coord = coord_tensor.matrix<VALUETYPE> ();
   auto type = type_tensor.matrix<int> ();
   auto box = box_tensor.matrix<VALUETYPE> ();
   auto mesh = mesh_tensor.flat<int> ();
   auto natoms = natoms_tensor.flat<int> ();
+  auto fparam = fparam_tensor.matrix<VALUETYPE> ();
 
   vector<VALUETYPE> dcoord (dcoord_);
   nnpmap.forward (dcoord.begin(), dcoord_.begin(), 3);
@@ -252,6 +275,9 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
     for (int jj = 0; jj < nall; ++jj){
       type(ii, jj) = datype[jj];
     }
+    for (int jj = 0; jj < fparam_.size(); ++jj){
+      fparam(ii, jj) = fparam_[jj];
+    }
   }
   
   for (int ii = 0; ii < 16; ++ii) mesh(ii) = 0;
@@ -271,13 +297,25 @@ make_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
   natoms (1) = nall;
   for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
 
-  input_tensors = {
-    {"t_coord",	coord_tensor}, 
-    {"t_type",	type_tensor},
-    {"t_box",	box_tensor},
-    {"t_mesh",	mesh_tensor},
-    {"t_natoms", natoms_tensor},
-  };  
+  if (fparam_.size() == 0) {
+    input_tensors = {
+      {"i_coord",	coord_tensor}, 
+      {"i_type",	type_tensor},
+      {"i_box",		box_tensor},
+      {"i_mesh",	mesh_tensor},
+      {"i_natoms",	natoms_tensor},
+    };  
+  }
+  else {
+    input_tensors = {
+      {"i_coord",	coord_tensor}, 
+      {"i_type",	type_tensor},
+      {"i_box",		box_tensor},
+      {"i_mesh",	mesh_tensor},
+      {"i_natoms",	natoms_tensor},
+      {"i_fparam",	fparam_tensor},
+    };  
+  }
 
   return nloc;
 }
@@ -308,7 +346,7 @@ run_model (ENERGYTYPE &			dener,
   std::vector<Tensor> output_tensors;
 
   checkStatus (session->Run(input_tensors, 
-			    {"energy_test", "force_test", "virial_test"}, 
+			    {"o_energy", "o_force", "o_virial"}, 
 			    {}, 
 			    &output_tensors));
   
@@ -367,7 +405,7 @@ run_model (ENERGYTYPE &			dener,
   std::vector<Tensor> output_tensors;
 
   checkStatus (session->Run(input_tensors, 
-			    {"energy_test", "force_test", "virial_test", "atom_energy_test", "atom_virial_test"}, 
+			    {"o_energy", "o_force", "o_virial", "o_atom_energy", "o_atom_virial"}, 
 			    {}, 
 			    &output_tensors));
 
@@ -448,9 +486,16 @@ NNPInter (const string & model)
   checkStatus (NewSession(options, &session));
   checkStatus (ReadBinaryProto(Env::Default(), model, &graph_def));
   checkStatus (session->Create(graph_def));  
-  rcut = get_rcut();
+  rcut = get_scalar<VALUETYPE>("model_attr/t_rcut");
   cell_size = rcut;
-  ntypes = get_ntypes();
+  ntypes = get_scalar<int>("model_attr/t_ntypes");
+  dfparam = get_scalar<int>("model_attr/t_dfparam");
+  assert(rcut == get_rcut());
+  assert(ntypes == get_ntypes());
+  if (dfparam < 0) dfparam = 0;
+  // rcut = get_rcut();
+  // ntypes = get_ntypes();
+  // dfparam = get_dfparam();
   inited = true;
 }
 
@@ -465,9 +510,17 @@ init (const string & model)
   checkStatus (NewSession(options, &session));
   checkStatus (ReadBinaryProto(Env::Default(), model, &graph_def));
   checkStatus (session->Create(graph_def));  
-  rcut = get_rcut();
+  rcut = get_scalar<VALUETYPE>("model_attr/t_rcut");
   cell_size = rcut;
-  ntypes = get_ntypes();
+  ntypes = get_scalar<int>("model_attr/t_ntypes");
+  dfparam = get_scalar<int>("model_attr/t_dfparam");
+  assert(rcut == get_rcut());
+  assert(ntypes == get_ntypes());      
+  if (dfparam < 0) dfparam = 0;
+  // rcut = get_rcut();
+  // cell_size = rcut;
+  // ntypes = get_ntypes();
+  // dfparam = get_dfparam();
   inited = true;
 }
 
@@ -487,32 +540,18 @@ print_summary(const string &pre) const
   cout << pre << "set tf inter_op_parallelism_threads: " <<  num_inter_nthreads << endl;
 }
 
-
-VALUETYPE
-NNPInter::
-get_rcut () const
-{
-  std::vector<Tensor> output_tensors;
-  checkStatus (session->Run(std::vector<std::pair<string, Tensor>> ({}), 
-			    {"t_rcut"}, 
-			    {}, 
-			    &output_tensors));
-  Tensor output_rc = output_tensors[0];
-  auto orc = output_rc.flat <VALUETYPE> ();
-  return orc(0);
-}
-
-int
+template<class VT>
+VT
 NNPInter::
-get_ntypes () const
+get_scalar (const string & name) const
 {
   std::vector<Tensor> output_tensors;
   checkStatus (session->Run(std::vector<std::pair<string, Tensor>> ({}), 
-			    {"t_ntypes"}, 
+			    {name.c_str()}, 
 			    {}, 
 			    &output_tensors));
   Tensor output_rc = output_tensors[0];
-  auto orc = output_rc.flat <int> ();
+  auto orc = output_rc.flat <VT> ();
   return orc(0);
 }
 
@@ -524,15 +563,19 @@ compute (ENERGYTYPE &			dener,
 	 const vector<VALUETYPE> &	dcoord_,
 	 const vector<int> &		datype_,
 	 const vector<VALUETYPE> &	dbox, 
-	 const int			nghost)
+	 const int			nghost,
+	 const vector<VALUETYPE>	fparam)
 {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.begin() + nloc);
   assert (nloc == nnpmap.get_type().size());
+  if (fparam.size() != dfparam) {
+    throw std::runtime_error("the dim of frame parameter provided is not consistent with what the model uses");
+  }
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, nnpmap, nghost);
+  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, nnpmap, nghost);
   assert (ret == nloc);
 
   run_model (dener, dforce_, dvirial, session, input_tensors, nnpmap, nghost);
@@ -547,19 +590,23 @@ compute (ENERGYTYPE &			dener,
 	 const vector<int> &		datype_,
 	 const vector<VALUETYPE> &	dbox, 
 	 const int			nghost,
-	 const LammpsNeighborList &	lmp_list)
+	 const LammpsNeighborList &	lmp_list,
+	 const vector<VALUETYPE>	fparam)
 {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.begin() + nloc);
   assert (nloc == nnpmap.get_type().size());
+  if (fparam.size() != dfparam) {
+    throw std::runtime_error("the dim of frame parameter provided is not consistent with what the model uses");
+  }
 
   InternalNeighborList nlist;
   convert_nlist_lmp_internal (nlist, lmp_list);
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, nnpmap, nghost);
+  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, nnpmap, nghost);
   assert (nloc == ret);
 
   run_model (dener, dforce_, dvirial, session, input_tensors, nnpmap, nghost);
@@ -575,12 +622,16 @@ compute (ENERGYTYPE &			dener,
 	 vector<VALUETYPE> &		datom_virial_,
 	 const vector<VALUETYPE> &	dcoord_,
 	 const vector<int> &		datype_,
-	 const vector<VALUETYPE> &	dbox)
+	 const vector<VALUETYPE> &	dbox,
+	 const vector<VALUETYPE>	fparam)
 {
   NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.end());
+  if (fparam.size() != dfparam) {
+    throw std::runtime_error("the dim of frame parameter provided is not consistent with what the model uses");
+  }
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int nloc = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, nnpmap);
+  int nloc = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, nnpmap);
 
   run_model (dener, dforce_, dvirial, datom_energy_, datom_virial_, session, input_tensors, nnpmap);
 }
@@ -598,19 +649,23 @@ compute (ENERGYTYPE &			dener,
 	 const vector<int> &		datype_,
 	 const vector<VALUETYPE> &	dbox, 
 	 const int			nghost, 
-	 const LammpsNeighborList &	lmp_list)
+	 const LammpsNeighborList &	lmp_list,
+	 const vector<VALUETYPE>	fparam)
 {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.begin() + nloc);
   assert (nloc == nnpmap.get_type().size());
+  if (fparam.size() != dfparam) {
+    throw std::runtime_error("the dim of frame parameter provided is not consistent with what the model uses");
+  }
 
   InternalNeighborList nlist;
   convert_nlist_lmp_internal (nlist, lmp_list);
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, nnpmap, nghost);
+  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, nnpmap, nghost);
   assert (nloc == ret);
 
   run_model (dener, dforce_, dvirial, datom_energy_, datom_virial_, session, input_tensors, nnpmap, nghost);
@@ -642,9 +697,14 @@ NNPInterModelDevi (const vector<string> & models)
     checkStatus (ReadBinaryProto(Env::Default(), models[ii], &graph_defs[ii]));
     checkStatus (sessions[ii]->Create(graph_defs[ii]));
   }
-  rcut = get_rcut();
+  rcut = get_scalar<VALUETYPE>("model_attr/t_rcut");
   cell_size = rcut;
-  ntypes = get_ntypes();
+  ntypes = get_scalar<int>("model_attr/t_ntypes");
+  dfparam = get_scalar<int>("model_attr/t_dfparam");
+  if (dfparam < 0) dfparam = 0;
+  // rcut = get_rcut();
+  // cell_size = rcut;
+  // ntypes = get_ntypes();
   inited = true;
 }
 
@@ -664,25 +724,31 @@ init (const vector<string> & models)
     checkStatus (ReadBinaryProto(Env::Default(), models[ii], &graph_defs[ii]));
     checkStatus (sessions[ii]->Create(graph_defs[ii]));
   }
-  rcut = get_rcut();
+  rcut = get_scalar<VALUETYPE>("model_attr/t_rcut");
   cell_size = rcut;
-  ntypes = get_ntypes();
+  ntypes = get_scalar<int>("model_attr/t_ntypes");
+  dfparam = get_scalar<int>("model_attr/t_dfparam");
+  if (dfparam < 0) dfparam = 0;
+  // rcut = get_rcut();
+  // cell_size = rcut;
+  // ntypes = get_ntypes();
   inited = true;
 }
 
-VALUETYPE
+template<class VT>
+VT
 NNPInterModelDevi::
-get_rcut () const
+get_scalar(const string name) const 
 {
-  VALUETYPE myrcut = 0;
+  VT myrcut = 0;
   for (unsigned ii = 0; ii < numb_models; ++ii){
     std::vector<Tensor> output_tensors;
     checkStatus (sessions[ii]->Run(std::vector<std::pair<string, Tensor>> ({}), 
-				   {"t_rcut"}, 
+				   {name.c_str()}, 
 				   {}, 
 				   &output_tensors));
     Tensor output_rc = output_tensors[0];
-    auto orc = output_rc.flat <VALUETYPE> ();
+    auto orc = output_rc.flat <VT> ();
     if (ii == 0){
       myrcut = orc(0);
     }
@@ -693,29 +759,6 @@ get_rcut () const
   return myrcut;
 }
 
-int
-NNPInterModelDevi::
-get_ntypes () const
-{
-  int myntypes = 0;
-  for (unsigned ii = 0; ii < numb_models; ++ii){
-    std::vector<Tensor> output_tensors;
-    checkStatus (sessions[ii]->Run(std::vector<std::pair<string, Tensor>> ({}), 
-				   {"t_ntypes"}, 
-				   {}, 
-				   &output_tensors));
-    Tensor output_rc = output_tensors[0];
-    auto orc = output_rc.flat <int> ();
-    if (ii == 0){
-      myntypes = orc(0);
-    }
-    else {
-      assert (myntypes == orc(0));
-    }
-  }
-  return myntypes;
-}
-
 void
 NNPInterModelDevi::
 compute (ENERGYTYPE &			dener,
@@ -724,14 +767,18 @@ compute (ENERGYTYPE &			dener,
 	 vector<VALUETYPE> &		model_devi,
 	 const vector<VALUETYPE> &	dcoord_,
 	 const vector<int> &		datype_,
-	 const vector<VALUETYPE> &	dbox)
+	 const vector<VALUETYPE> &	dbox,
+	 const vector<VALUETYPE>	fparam)
 {
   if (numb_models == 0) return;
+  if (fparam.size() != dfparam) {
+    throw std::runtime_error("the dim of frame parameter provided is not consistent with what the model uses");
+  }
 
   NNPAtomMap<VALUETYPE> nnpmap (datype_.begin(), datype_.end());
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int nloc = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, nnpmap);
+  int nloc = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, nnpmap);
 
   vector<ENERGYTYPE > all_energy (numb_models);
   vector<vector<VALUETYPE > > all_force (numb_models);
@@ -769,9 +816,13 @@ compute (vector<ENERGYTYPE> &		all_energy,
 	 const vector<int> &		datype_,
 	 const vector<VALUETYPE> &	dbox,
 	 const int			nghost,
-	 const LammpsNeighborList &	lmp_list)
+	 const LammpsNeighborList &	lmp_list,
+	 const vector<VALUETYPE>	fparam)
 {
   if (numb_models == 0) return;
+  if (fparam.size() != dfparam) {
+    throw std::runtime_error("the dim of frame parameter provided is not consistent with what the model uses");
+  }
   
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
@@ -783,7 +834,7 @@ compute (vector<ENERGYTYPE> &		all_energy,
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, nnpmap, nghost);
+  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, nnpmap, nghost);
   assert (nloc == ret);
 
   all_energy.resize (numb_models);
@@ -806,9 +857,13 @@ compute (vector<ENERGYTYPE> &			all_energy,
 	 const vector<int> &			datype_,
 	 const vector<VALUETYPE> &		dbox,
 	 const int				nghost,
-	 const LammpsNeighborList &		lmp_list)
+	 const LammpsNeighborList &		lmp_list,
+	 const vector<VALUETYPE>		fparam)
 {
   if (numb_models == 0) return;
+  if (fparam.size() != dfparam) {
+    throw std::runtime_error("the dim of frame parameter provided is not consistent with what the model uses");
+  }
   
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
@@ -820,7 +875,7 @@ compute (vector<ENERGYTYPE> &			all_energy,
   shuffle_nlist (nlist, nnpmap);
 
   std::vector<std::pair<string, Tensor>> input_tensors;
-  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, nnpmap, nghost);
+  int ret = make_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, nnpmap, nghost);
   assert (nloc == ret);
 
   all_energy.resize (numb_models);
diff --git a/source/lib/src/NeighborList.cpp b/source/lib/src/NeighborList.cpp
index 9d296335f1..fda1eddbf0 100644
--- a/source/lib/src/NeighborList.cpp
+++ b/source/lib/src/NeighborList.cpp
@@ -3,6 +3,9 @@
 // #include <iomanip> 
 
 using namespace std;
+enum {
+  MAX_WARN_IDX_OUT_OF_BOUND = 10,
+};
 
 bool 
 is_loc (const vector<int> & idx, 
@@ -44,6 +47,10 @@ build_clist (vector<vector<int > > &	clist,
 	     const SimulationRegion<double> & region,
 	     const vector<int > &	global_grid)
 {
+  static int count_warning_loc_idx_lower = 0;
+  static int count_warning_loc_idx_upper = 0;
+  static int count_warning_ghost_idx_lower = 0;
+  static int count_warning_ghost_idx_upper = 0;
   // compute region info, in terms of internal coord
   int nall = coord.size() / 3;
   vector<int> ext_ncell(3);
@@ -72,12 +79,16 @@ build_clist (vector<vector<int > > &	clist,
     for (int dd = 0; dd < 3; ++dd){
       idx[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
       if (inter[dd] - nat_orig[dd] < 0.) idx[dd] --;
-      if (idx[dd] < nat_stt[dd]) {
-	cerr << "# warning: loc idx out of lower bound " << endl;
+      if (idx[dd] < nat_stt[dd] &&
+	  count_warning_loc_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND) {
+	cerr << "# warning: loc idx out of lower bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << endl;
+	count_warning_loc_idx_lower ++;
 	idx[dd] = nat_stt[dd];
       }
-      else if (idx[dd] >= nat_end[dd]) {
-	cerr << "# warning: loc idx out of upper bound " << endl;
+      else if (idx[dd] >= nat_end[dd] &&
+	       count_warning_loc_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
+	cerr << "# warning: loc idx out of upper bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << endl;
+	count_warning_loc_idx_upper ++;
 	idx[dd] = nat_end[dd] - 1;
       }
       idx[dd] += idx_orig_shift[dd];
@@ -91,16 +102,20 @@ build_clist (vector<vector<int > > &	clist,
     for (int dd = 0; dd < 3; ++dd){
       idx[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
       if (inter[dd] - nat_orig[dd] < 0.) idx[dd] --;
-      if (idx[dd] < ext_stt[dd]) {
+      if (idx[dd] < ext_stt[dd] &&
+	  count_warning_ghost_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND) {
 	if (fabs((inter[dd] - nat_orig[dd]) - (ext_stt[dd] * cell_size[dd]))
 	    > fabs(ext_stt[dd] * cell_size[dd]) * numeric_limits<double>::epsilon() * 5.
 	    ) {
-	  cerr << "# warning: ghost idx out of lower bound " << endl;
+	  cerr << "# warning: ghost idx out of lower bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << endl;
+	  count_warning_ghost_idx_lower ++;
 	}
 	idx[dd] = ext_stt[dd];
       }
-      else if (idx[dd] >= ext_end[dd]) {
-	cerr << "# warning: ghost idx out of upper bound " << endl;
+      else if (idx[dd] >= ext_end[dd] &&
+	       count_warning_ghost_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
+	cerr << "# warning: ghost idx out of upper bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << endl;
+	count_warning_ghost_idx_upper ++;
 	idx[dd] = ext_end[dd] - 1;
       }
       idx[dd] += idx_orig_shift[dd];
@@ -117,6 +132,8 @@ build_clist (vector<vector<int > > &	clist,
 	     const vector<int > &	nat_end,
 	     const SimulationRegion<double> & region)
 {
+  static int count_warning_loc_idx_lower = 0;
+  static int count_warning_loc_idx_upper = 0;
   // compute region info, in terms of internal coord
   int nall = coord.size() / 3;
   vector<int> nat_ncell(3);
@@ -144,12 +161,16 @@ build_clist (vector<vector<int > > &	clist,
     for (int dd = 0; dd < 3; ++dd){
       idx[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
       if (inter[dd] - nat_orig[dd] < 0.) idx[dd] --;
-      if (idx[dd] < nat_stt[dd]) {
-	cerr << "# warning: loc idx out of lower bound " << endl;
+      if (idx[dd] < nat_stt[dd] &&
+	  count_warning_loc_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND) {
+	cerr << "# warning: loc idx out of lower bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << endl;
+	count_warning_loc_idx_lower ++;
 	idx[dd] = nat_stt[dd];
       }
-      else if (idx[dd] >= nat_end[dd]) {
-	cerr << "# warning: loc idx out of upper bound " << endl;
+      else if (idx[dd] >= nat_end[dd] &&
+	       count_warning_loc_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
+	cerr << "# warning: loc idx out of upper bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << endl;
+	count_warning_loc_idx_upper ++;
 	idx[dd] = nat_end[dd] - 1;
       }
     }
diff --git a/source/lmp/pair_nnp.cpp b/source/lmp/pair_nnp.cpp
index b97a7b69c3..ee46fe2d02 100644
--- a/source/lmp/pair_nnp.cpp
+++ b/source/lmp/pair_nnp.cpp
@@ -39,6 +39,9 @@ PairNNP::PairNNP(LAMMPS *lmp)
     : Pair(lmp)
       
 {
+  if (strcmp(update->unit_style,"metal") != 0) {
+    error->all(FLERR,"Pair deepmd requires metal unit, please set it by \"units metal\"");
+  }
   pppmflag = 1;
   respa_enable = 0;
   writedata = 0;
@@ -127,12 +130,15 @@ void PairNNP::compute(int eflag, int vflag)
   }
   
   // compute
+  bool single_model = (numb_models == 1);
+  bool multi_models_no_mod_devi = (numb_models > 1 && (out_freq == 0 || update->ntimestep % out_freq != 0));
+  bool multi_models_mod_devi = (numb_models > 1 && (out_freq > 0 && update->ntimestep % out_freq == 0));
   if (do_ghost) {
     LammpsNeighborList lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
-    if (numb_models == 1) {
+    if (single_model || multi_models_no_mod_devi) {
       if ( ! (eflag_atom || vflag_atom) ) {      
 #ifdef HIGH_PREC
-	nnp_inter.compute (dener, dforce, dvirial, dcoord, dtype, dbox, nghost, lmp_list);
+	nnp_inter.compute (dener, dforce, dvirial, dcoord, dtype, dbox, nghost, lmp_list, fparam);
 #else
 	vector<float> dcoord_(dcoord.size());
 	vector<float> dbox_(dbox.size());
@@ -141,7 +147,7 @@ void PairNNP::compute(int eflag, int vflag)
 	vector<float> dforce_(dforce.size(), 0);
 	vector<float> dvirial_(dvirial.size(), 0);
 	double dener_ = 0;
-	nnp_inter.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_, nghost, lmp_list);
+	nnp_inter.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_, nghost, lmp_list, fparam);
 	for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];	
 	for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
 	dener = dener_;
@@ -152,7 +158,7 @@ void PairNNP::compute(int eflag, int vflag)
 	vector<double > deatom (nall * 1, 0);
 	vector<double > dvatom (nall * 9, 0);
 #ifdef HIGH_PREC
-	nnp_inter.compute (dener, dforce, dvirial, deatom, dvatom, dcoord, dtype, dbox, nghost, lmp_list);
+	nnp_inter.compute (dener, dforce, dvirial, deatom, dvatom, dcoord, dtype, dbox, nghost, lmp_list, fparam);
 #else 
 	vector<float> dcoord_(dcoord.size());
 	vector<float> dbox_(dbox.size());
@@ -163,7 +169,7 @@ void PairNNP::compute(int eflag, int vflag)
 	vector<float> deatom_(dforce.size(), 0);
 	vector<float> dvatom_(dforce.size(), 0);
 	double dener_ = 0;
-	nnp_inter.compute (dener_, dforce_, dvirial_, deatom_, dvatom_, dcoord_, dtype, dbox_, nghost, lmp_list);
+	nnp_inter.compute (dener_, dforce_, dvirial_, deatom_, dvatom_, dcoord_, dtype, dbox_, nghost, lmp_list, fparam);
 	for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];	
 	for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
 	for (unsigned dd = 0; dd < deatom.size(); ++dd) deatom[dd] = deatom_[dd];	
@@ -185,7 +191,7 @@ void PairNNP::compute(int eflag, int vflag)
 	}
       }
     }
-    else {
+    else if (multi_models_mod_devi) {
       vector<double > deatom (nall * 1, 0);
       vector<double > dvatom (nall * 9, 0);
 #ifdef HIGH_PREC
@@ -193,12 +199,17 @@ void PairNNP::compute(int eflag, int vflag)
       vector<vector<double>> 	all_virial;	       
       vector<vector<double>> 	all_atom_energy;
       vector<vector<double>> 	all_atom_virial;
-      nnp_inter_model_devi.compute(all_energy, all_force, all_virial, all_atom_energy, all_atom_virial, dcoord, dtype, dbox, nghost, lmp_list);
-      nnp_inter_model_devi.compute_avg (dener, all_energy);
-      nnp_inter_model_devi.compute_avg (dforce, all_force);
-      nnp_inter_model_devi.compute_avg (dvirial, all_virial);
-      nnp_inter_model_devi.compute_avg (deatom, all_atom_energy);
-      nnp_inter_model_devi.compute_avg (dvatom, all_atom_virial);
+      nnp_inter_model_devi.compute(all_energy, all_force, all_virial, all_atom_energy, all_atom_virial, dcoord, dtype, dbox, nghost, lmp_list, fparam);
+      // nnp_inter_model_devi.compute_avg (dener, all_energy);
+      // nnp_inter_model_devi.compute_avg (dforce, all_force);
+      // nnp_inter_model_devi.compute_avg (dvirial, all_virial);
+      // nnp_inter_model_devi.compute_avg (deatom, all_atom_energy);
+      // nnp_inter_model_devi.compute_avg (dvatom, all_atom_virial);
+      dener = all_energy[0];
+      dforce = all_force[0];
+      dvirial = all_virial[0];
+      deatom = all_atom_energy[0];
+      dvatom = all_atom_virial[0];
 #else 
       vector<float> dcoord_(dcoord.size());
       vector<float> dbox_(dbox.size());
@@ -214,12 +225,17 @@ void PairNNP::compute(int eflag, int vflag)
       vector<vector<float>> 	all_virial_;	       
       vector<vector<float>> 	all_atom_energy_;
       vector<vector<float>> 	all_atom_virial_;
-      nnp_inter_model_devi.compute(all_energy_, all_force_, all_virial_, all_atom_energy_, all_atom_virial_, dcoord_, dtype, dbox_, nghost, lmp_list);
-      nnp_inter_model_devi.compute_avg (dener_, all_energy_);
-      nnp_inter_model_devi.compute_avg (dforce_, all_force_);
-      nnp_inter_model_devi.compute_avg (dvirial_, all_virial_);
-      nnp_inter_model_devi.compute_avg (deatom_, all_atom_energy_);
-      nnp_inter_model_devi.compute_avg (dvatom_, all_atom_virial_);
+      nnp_inter_model_devi.compute(all_energy_, all_force_, all_virial_, all_atom_energy_, all_atom_virial_, dcoord_, dtype, dbox_, nghost, lmp_list, fparam);
+      // nnp_inter_model_devi.compute_avg (dener_, all_energy_);
+      // nnp_inter_model_devi.compute_avg (dforce_, all_force_);
+      // nnp_inter_model_devi.compute_avg (dvirial_, all_virial_);
+      // nnp_inter_model_devi.compute_avg (deatom_, all_atom_energy_);
+      // nnp_inter_model_devi.compute_avg (dvatom_, all_atom_virial_);
+      dener_ = all_energy_[0];
+      dforce_ = all_force_[0];
+      dvirial_ = all_virial_[0];
+      deatom_ = all_atom_energy_[0];
+      dvatom_ = all_atom_virial_[0];
       dener = dener_;
       for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];	
       for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
@@ -319,6 +335,9 @@ void PairNNP::compute(int eflag, int vflag)
 	}
       }
     }
+    else {
+      error->all(FLERR,"unknown computational branch");
+    }
   }
   else {
     if (numb_models == 1) {
@@ -388,33 +407,90 @@ void PairNNP::allocate()
   }
 }
 
+
+static bool 
+is_key (const string& input) 
+{
+  vector<string> keys ;
+  keys.push_back("out_freq");
+  keys.push_back("out_file");
+  keys.push_back("fparam");
+
+  for (int ii = 0; ii < keys.size(); ++ii){
+    if (input == keys[ii]) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
 void PairNNP::settings(int narg, char **arg)
 {
   if (narg <= 0) error->all(FLERR,"Illegal pair_style command");
 
-  if (narg == 1) {
+  vector<string> models;
+  int iarg = 0;
+  while (iarg < narg){
+    if (is_key(arg[iarg])) {
+      break;
+    }
+    iarg ++;
+  }
+  for (int ii = 0; ii < iarg; ++ii){
+    models.push_back(arg[ii]);
+  }
+  numb_models = models.size();
+  if (numb_models == 1) {
     nnp_inter.init (arg[0]);
     cutoff = nnp_inter.cutoff ();
     numb_types = nnp_inter.numb_types();
-    numb_models = 1;
+    dim_fparam = nnp_inter.dim_fparam();
   }
   else {
-    if (narg < 4) {
-      error->all(FLERR,"Illegal pair_style command\nusage:\npair_style deepmd model1 model2 [models...] out_freq out_file\n");
-    }    
-    vector<string> models;
-    for (int ii = 0; ii < narg-2; ++ii){
-      models.push_back(arg[ii]);
-    }
-    out_freq = atoi(arg[narg-2]);
-    if (out_freq < 0) error->all(FLERR,"Illegal out_freq, should be >= 0");
-    out_file = string(arg[narg-1]);
-
+    nnp_inter.init (arg[0]);
     nnp_inter_model_devi.init(models);
     cutoff = nnp_inter_model_devi.cutoff();
     numb_types = nnp_inter_model_devi.numb_types();
-    numb_models = models.size();
-    if (comm->me == 0){
+    dim_fparam = nnp_inter_model_devi.dim_fparam();
+    assert(cutoff == nnp_inter.cutoff());
+    assert(numb_types == nnp_inter.numb_types());
+    assert(dim_fparam == nnp_inter.dim_fparam());
+  }
+
+  out_freq = 100;
+  out_file = "model_devi.out";
+  fparam.clear();
+  while (iarg < narg) {
+    if (! is_key(arg[iarg])) {
+      error->all(FLERR,"Illegal pair_style command\nwrong number of parameters\n");
+    }
+    if (string(arg[iarg]) == string("out_freq")) {
+      if (iarg+1 >= narg) error->all(FLERR,"Illegal out_freq, not provided");
+      out_freq = atoi(arg[iarg+1]);
+      iarg += 2;
+    }
+    else if (string(arg[iarg]) == string("out_file")) {
+      if (iarg+1 >= narg) error->all(FLERR,"Illegal out_file, not provided");
+      out_file = string(arg[iarg+1]);	
+      iarg += 2;
+    }
+    else if (string(arg[iarg]) == string("fparam")) {
+      for (int ii = 0; ii < dim_fparam; ++ii){
+	if (iarg+1+ii >= narg || is_key(arg[iarg+1+ii])) {
+	  char tmp[1024];
+	  sprintf(tmp, "Illegal fparam, the dimension should be %d", dim_fparam);		  
+	  error->all(FLERR, tmp);
+	}
+	fparam.push_back(atof(arg[iarg+1+ii]));
+      }
+      iarg += 1 + dim_fparam ;
+    }
+  }
+  if (out_freq < 0) error->all(FLERR,"Illegal out_freq, should be >= 0");  
+  
+  if (comm->me == 0){
+    if (numb_models > 1 && out_freq > 0){
       fp.open (out_file);
       fp << scientific;
       fp << "#"
@@ -427,9 +503,6 @@ void PairNNP::settings(int narg, char **arg)
 	 << setw(18+1) << "avg_devi_f"
 	 << endl;
     }
-  }  
-  
-  if (comm->me == 0){
     string pre = "  ";
     cout << pre << ">>> Info of model(s):" << endl
 	 << pre << "using " << setw(3) << numb_models << " model(s): ";
@@ -437,13 +510,20 @@ void PairNNP::settings(int narg, char **arg)
       cout << arg[0] << " ";
     }
     else {
-      for (int ii = 0; ii < narg-2; ++ii){
-	cout << arg[ii] << " ";
+      for (int ii = 0; ii < models.size(); ++ii){
+      	cout << models[ii] << " ";
       }
     }
     cout << endl
 	 << pre << "rcut in model:      " << cutoff << endl
 	 << pre << "ntypes in model:    " << numb_types << endl;
+    if (dim_fparam > 0) {
+      cout << pre << "using fparam(s):    " ;
+      for (int ii = 0; ii < dim_fparam; ++ii){
+	cout << fparam[ii] << "  " ;
+      }
+      cout << endl;
+    }
   }
   
   comm_reverse = numb_models * 3;
diff --git a/source/lmp/pair_nnp.h.in b/source/lmp/pair_nnp.h.in
index 5e9b04e4a3..edb4aee9aa 100644
--- a/source/lmp/pair_nnp.h.in
+++ b/source/lmp/pair_nnp.h.in
@@ -78,6 +78,12 @@ private:
   ofstream fp;
   int out_freq;
   string out_file;
+  int dim_fparam;
+#ifdef HIGH_PREC
+  vector<double > fparam;
+#else
+  vector<float > fparam;
+#endif
 };
 
 }
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index 7f58c1ecae..be8a191aba 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -3,60 +3,34 @@
 set(OP_LIB ${PROJECT_SOURCE_DIR}/lib/src/SimulationRegion.cpp ${PROJECT_SOURCE_DIR}/lib/src/NeighborList.cpp)
 
 set (OP_CXX_FLAG -D_GLIBCXX_USE_CXX11_ABI=${OP_ABI} )
-file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_norot.cc prod_force_norot.cc prod_virial_norot.cc )
-file(GLOB OP_PY  *.py)
+file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a.cc descrpt_se_r.cc tab_inter.cc prod_force_se_a.cc prod_virial_se_a.cc prod_force_se_r.cc prod_virial_se_r.cc soft_min.cc soft_min_force.cc soft_min_virial.cc )
+file(GLOB OP_GRADS_SRC prod_force_grad.cc prod_force_se_a_grad.cc prod_force_se_r_grad.cc prod_virial_grad.cc prod_virial_se_a_grad.cc prod_virial_se_r_grad.cc soft_min_force_grad.cc soft_min_virial_grad.cc )
+file(GLOB OP_PY *.py)
 
-add_library(${LIB_DEEPMD_OP} SHARED ${OP_SRC})
-add_library(op_abi SHARED ${OP_SRC} ${OP_LIB})
-add_library(prod_force_grad SHARED prod_force_grad.cc)
-add_library(prod_force_norot_grad SHARED prod_force_norot_grad.cc)
-add_library(prod_virial_grad SHARED prod_virial_grad.cc)
-add_library(prod_virial_norot_grad SHARED prod_virial_norot_grad.cc)
+if (BUILD_CPP_IF)
+  add_library(${LIB_DEEPMD_OP} SHARED ${OP_SRC})
+endif (BUILD_CPP_IF)
+if (BUILD_PY_IF)
+  add_library(op_abi SHARED ${OP_SRC} ${OP_LIB})
+  add_library(op_grads SHARED ${OP_GRADS_SRC})
+  target_link_libraries(
+    op_abi ${TensorFlowFramework_LIBRARY}
+    )
+  target_link_libraries(
+    op_grads ${TensorFlowFramework_LIBRARY}
+    )
+  set_target_properties(
+    op_abi 
+    PROPERTIES 
+    COMPILE_FLAGS ${OP_CXX_FLAG}
+    )
+endif (BUILD_PY_IF)
 
-target_link_libraries(
-  op_abi ${TensorFlowFramework_LIBRARY}
-)
-target_link_libraries(
-  prod_force_grad ${TensorFlowFramework_LIBRARY}
-)
-target_link_libraries(
-  prod_force_norot_grad ${TensorFlowFramework_LIBRARY}
-)
-target_link_libraries(
-  prod_virial_grad ${TensorFlowFramework_LIBRARY}
-)
-target_link_libraries(
-  prod_virial_norot_grad ${TensorFlowFramework_LIBRARY}
-)
-set_target_properties(
-  op_abi 
-  PROPERTIES 
-  COMPILE_FLAGS ${OP_CXX_FLAG}
-)
-set_target_properties(
-  prod_force_grad  
-  PROPERTIES 
-  COMPILE_FLAGS ${OP_CXX_FLAG}
-)
-set_target_properties(
-  prod_force_norot_grad  
-  PROPERTIES 
-  COMPILE_FLAGS ${OP_CXX_FLAG}
-)
-set_target_properties(
-  prod_virial_grad 
-  PROPERTIES 
-  COMPILE_FLAGS ${OP_CXX_FLAG}
-)
-set_target_properties(
-  prod_virial_norot_grad 
-  PROPERTIES 
-  COMPILE_FLAGS ${OP_CXX_FLAG}
-)
-install(TARGETS ${LIB_DEEPMD_OP} DESTINATION lib/)
-install(TARGETS op_abi DESTINATION lib/deepmd)
-install(TARGETS prod_force_grad DESTINATION lib/deepmd)
-install(TARGETS prod_force_norot_grad DESTINATION lib/deepmd)
-install(TARGETS prod_virial_grad DESTINATION lib/deepmd)
-install(TARGETS prod_virial_norot_grad DESTINATION lib/deepmd)
-install(FILES  ${OP_PY} DESTINATION lib/deepmd)
+if (BUILD_CPP_IF)
+  install(TARGETS ${LIB_DEEPMD_OP}		DESTINATION lib/)
+endif (BUILD_CPP_IF)
+if (BUILD_PY_IF)
+  install(TARGETS op_abi			DESTINATION deepmd)
+  install(TARGETS op_grads			DESTINATION deepmd)
+  install(FILES  ${OP_PY}			DESTINATION deepmd)
+endif (BUILD_PY_IF)
diff --git a/source/op/_prod_force_grad.py b/source/op/_prod_force_grad.py
index f68076ab79..d07fc2db1e 100644
--- a/source/op/_prod_force_grad.py
+++ b/source/op/_prod_force_grad.py
@@ -9,18 +9,19 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import sparse_ops
 
-force_module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
-assert (os.path.isfile (force_module_path  + "libprod_force_grad.so" )), "force module grad does not exist"
-prod_force_grad_module = tf.load_op_library(force_module_path + 'libprod_force_grad.so')
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
      
 @ops.RegisterGradient("ProdForce")
 def _prod_force_grad_cc (op, grad):    
-    net_grad =  prod_force_grad_module.prod_force_grad (grad, 
-                                                        op.inputs[0], 
-                                                        op.inputs[1], 
-                                                        op.inputs[2], 
-                                                        op.inputs[3], 
-                                                        op.inputs[4], 
-                                                        n_a_sel = op.get_attr("n_a_sel"),
-                                                        n_r_sel = op.get_attr("n_r_sel"))
+    net_grad =  op_grads_module.prod_force_grad (grad, 
+                                                 op.inputs[0], 
+                                                 op.inputs[1], 
+                                                 op.inputs[2], 
+                                                 op.inputs[3], 
+                                                 op.inputs[4], 
+                                                 n_a_sel = op.get_attr("n_a_sel"),
+                                                 n_r_sel = op.get_attr("n_r_sel"))
     return [net_grad, None, None, None, None]
diff --git a/source/op/_prod_force_norot_grad.py b/source/op/_prod_force_norot_grad.py
deleted file mode 100644
index 8cf42f92a8..0000000000
--- a/source/op/_prod_force_norot_grad.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-"""
-Gradients for prod force.
-"""
-
-import os
-import tensorflow as tf
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import sparse_ops
-
-force_module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
-assert (os.path.isfile (force_module_path  + "libprod_force_norot_grad.so" )), "force module grad does not exist"
-prod_force_grad_module = tf.load_op_library(force_module_path + 'libprod_force_norot_grad.so')
-     
-@ops.RegisterGradient("ProdForceNorot")
-def _prod_force_norot_grad_cc (op, grad):    
-    net_grad =  prod_force_grad_module.prod_force_norot_grad (grad, 
-                                                              op.inputs[0], 
-                                                              op.inputs[1], 
-                                                              op.inputs[2], 
-                                                              op.inputs[3], 
-                                                              n_a_sel = op.get_attr("n_a_sel"),
-                                                              n_r_sel = op.get_attr("n_r_sel"))
-    return [net_grad, None, None, None]
diff --git a/source/op/_prod_force_se_a_grad.py b/source/op/_prod_force_se_a_grad.py
new file mode 100644
index 0000000000..14e0b5556a
--- /dev/null
+++ b/source/op/_prod_force_se_a_grad.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+"""
+Gradients for prod force.
+"""
+
+import os
+import tensorflow as tf
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import sparse_ops
+
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
+     
+@ops.RegisterGradient("ProdForceSeA")
+def _prod_force_se_a_grad_cc (op, grad):    
+    net_grad =  op_grads_module.prod_force_se_a_grad (grad, 
+                                                       op.inputs[0], 
+                                                       op.inputs[1], 
+                                                       op.inputs[2], 
+                                                       op.inputs[3], 
+                                                       n_a_sel = op.get_attr("n_a_sel"),
+                                                       n_r_sel = op.get_attr("n_r_sel"))
+    return [net_grad, None, None, None]
diff --git a/source/op/_prod_force_se_r_grad.py b/source/op/_prod_force_se_r_grad.py
new file mode 100644
index 0000000000..be96ee7c4f
--- /dev/null
+++ b/source/op/_prod_force_se_r_grad.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+"""
+Gradients for prod force.
+"""
+
+import os
+import tensorflow as tf
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import sparse_ops
+
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
+     
+@ops.RegisterGradient("ProdForceSeR")
+def _prod_force_se_a_grad_cc (op, grad):    
+    net_grad =  op_grads_module.prod_force_se_r_grad (grad, 
+                                                      op.inputs[0], 
+                                                      op.inputs[1], 
+                                                      op.inputs[2], 
+                                                      op.inputs[3])
+    return [net_grad, None, None, None]
diff --git a/source/op/_prod_virial_grad.py b/source/op/_prod_virial_grad.py
index 895f3f2d7d..ab1a92cd24 100644
--- a/source/op/_prod_virial_grad.py
+++ b/source/op/_prod_virial_grad.py
@@ -9,19 +9,20 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import sparse_ops
 
-virial_module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
-assert (os.path.isfile (virial_module_path  + "libprod_virial_grad.so" )), "virial module grad does not exist"
-prod_virial_grad_module = tf.load_op_library(virial_module_path + 'libprod_virial_grad.so')
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
      
 @ops.RegisterGradient("ProdVirial")
 def _prod_virial_grad_cc (op, grad, grad_atom):    
-    net_grad =  prod_virial_grad_module.prod_virial_grad (grad, 
-                                                          op.inputs[0], 
-                                                          op.inputs[1], 
-                                                          op.inputs[2], 
-                                                          op.inputs[3], 
-                                                          op.inputs[4], 
-                                                          op.inputs[5], 
-                                                          n_a_sel = op.get_attr("n_a_sel"),
-                                                          n_r_sel = op.get_attr("n_r_sel"))
+    net_grad =  op_grads_module.prod_virial_grad (grad, 
+                                                  op.inputs[0], 
+                                                  op.inputs[1], 
+                                                  op.inputs[2], 
+                                                  op.inputs[3], 
+                                                  op.inputs[4], 
+                                                  op.inputs[5], 
+                                                  n_a_sel = op.get_attr("n_a_sel"),
+                                                  n_r_sel = op.get_attr("n_r_sel"))
     return [net_grad, None, None, None, None, None]
diff --git a/source/op/_prod_virial_norot_grad.py b/source/op/_prod_virial_norot_grad.py
deleted file mode 100644
index 894bd85452..0000000000
--- a/source/op/_prod_virial_norot_grad.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-"""
-Gradients for prod virial.
-"""
-
-import os
-import tensorflow as tf
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import sparse_ops
-
-virial_module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
-assert (os.path.isfile (virial_module_path  + "libprod_virial_norot_grad.so" )), "virial module grad does not exist"
-prod_virial_grad_module = tf.load_op_library(virial_module_path + 'libprod_virial_norot_grad.so')
-     
-@ops.RegisterGradient("ProdVirialNorot")
-def _prod_virial_norot_grad_cc (op, grad, grad_atom):    
-    net_grad =  prod_virial_grad_module.prod_virial_norot_grad (grad, 
-                                                                op.inputs[0], 
-                                                                op.inputs[1], 
-                                                                op.inputs[2], 
-                                                                op.inputs[3], 
-                                                                op.inputs[4], 
-                                                                n_a_sel = op.get_attr("n_a_sel"),
-                                                                n_r_sel = op.get_attr("n_r_sel"))
-    return [net_grad, None, None, None, None]
diff --git a/source/op/_prod_virial_se_a_grad.py b/source/op/_prod_virial_se_a_grad.py
new file mode 100644
index 0000000000..fb4d7688de
--- /dev/null
+++ b/source/op/_prod_virial_se_a_grad.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+"""
+Gradients for prod virial.
+"""
+
+import os
+import tensorflow as tf
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import sparse_ops
+
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
+     
+@ops.RegisterGradient("ProdVirialSeA")
+def _prod_virial_se_a_grad_cc (op, grad, grad_atom):    
+    net_grad =  op_grads_module.prod_virial_se_a_grad (grad, 
+                                                        op.inputs[0], 
+                                                        op.inputs[1], 
+                                                        op.inputs[2], 
+                                                        op.inputs[3], 
+                                                        op.inputs[4], 
+                                                        n_a_sel = op.get_attr("n_a_sel"),
+                                                        n_r_sel = op.get_attr("n_r_sel"))
+    return [net_grad, None, None, None, None]
diff --git a/source/op/_prod_virial_se_r_grad.py b/source/op/_prod_virial_se_r_grad.py
new file mode 100644
index 0000000000..15b3f4556c
--- /dev/null
+++ b/source/op/_prod_virial_se_r_grad.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+"""
+Gradients for prod virial.
+"""
+
+import os
+import tensorflow as tf
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import sparse_ops
+
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
+     
+@ops.RegisterGradient("ProdVirialSeR")
+def _prod_virial_se_a_grad_cc (op, grad, grad_atom):    
+    net_grad =  op_grads_module.prod_virial_se_r_grad (grad, 
+                                                       op.inputs[0], 
+                                                       op.inputs[1], 
+                                                       op.inputs[2], 
+                                                       op.inputs[3], 
+                                                       op.inputs[4])
+    return [net_grad, None, None, None, None]
diff --git a/source/op/_soft_min_force_grad.py b/source/op/_soft_min_force_grad.py
new file mode 100644
index 0000000000..8d133e706c
--- /dev/null
+++ b/source/op/_soft_min_force_grad.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+"""
+Gradients for soft min force
+"""
+
+import os
+import tensorflow as tf
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import sparse_ops
+
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
+     
+@ops.RegisterGradient("SoftMinForce")
+def _soft_min_force_grad_cc (op, grad):    
+    net_grad = op_grads_module.soft_min_force_grad (grad, 
+                                                    op.inputs[0], 
+                                                    op.inputs[1], 
+                                                    op.inputs[2], 
+                                                    op.inputs[3], 
+                                                    n_a_sel = op.get_attr("n_a_sel"),
+                                                    n_r_sel = op.get_attr("n_r_sel"))
+    return [net_grad, None, None, None]
diff --git a/source/op/_soft_min_virial_grad.py b/source/op/_soft_min_virial_grad.py
new file mode 100644
index 0000000000..e7e12c3d1c
--- /dev/null
+++ b/source/op/_soft_min_virial_grad.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+"""
+Gradients for soft min virial.
+"""
+
+import os
+import tensorflow as tf
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import sparse_ops
+
+module_path = os.path.dirname(os.path.realpath(__file__))
+module_file = os.path.join(module_path, 'libop_grads.so')
+assert (os.path.isfile(module_file)), 'module op_grads does not exist'
+op_grads_module = tf.load_op_library(module_file)
+     
+@ops.RegisterGradient("SoftMinVirial")
+def _soft_min_virial_grad_cc (op, grad, grad_atom):    
+    net_grad =  op_grads_module.soft_min_virial_grad (grad, 
+                                                      op.inputs[0], 
+                                                      op.inputs[1], 
+                                                      op.inputs[2], 
+                                                      op.inputs[3], 
+                                                      op.inputs[4], 
+                                                      n_a_sel = op.get_attr("n_a_sel"),
+                                                      n_r_sel = op.get_attr("n_r_sel"))
+    return [net_grad, None, None, None, None]
diff --git a/source/op/descrpt_norot.cc b/source/op/descrpt_se_a.cc
similarity index 96%
rename from source/op/descrpt_norot.cc
rename to source/op/descrpt_se_a.cc
index 42031c2557..1764191b5a 100644
--- a/source/op/descrpt_norot.cc
+++ b/source/op/descrpt_se_a.cc
@@ -19,7 +19,7 @@ typedef float  VALUETYPE ;
 #endif
 
 #ifdef HIGH_PREC
-REGISTER_OP("DescrptNorot")
+REGISTER_OP("DescrptSeA")
 .Input("coord: double")
 .Input("type: int32")
 .Input("natoms: int32")
@@ -37,7 +37,7 @@ REGISTER_OP("DescrptNorot")
 .Output("rij: double")
 .Output("nlist: int32");
 #else
-REGISTER_OP("DescrptNorot")
+REGISTER_OP("DescrptSeA")
 .Input("coord: float")
 .Input("type: int32")
 .Input("natoms: int32")
@@ -56,9 +56,9 @@ REGISTER_OP("DescrptNorot")
 .Output("nlist: int32");
 #endif
 
-class DescrptNorotOp : public OpKernel {
+class DescrptSeAOp : public OpKernel {
 public:
-  explicit DescrptNorotOp(OpKernelConstruction* context) : OpKernel(context) {
+  explicit DescrptSeAOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r", &rcut_r));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r_smth", &rcut_r_smth));
@@ -291,19 +291,19 @@ class DescrptNorotOp : public OpKernel {
 	vector<compute_t > d_descrpt_r_deriv;
 	vector<compute_t > d_rij_a;
 	vector<compute_t > d_rij_r;      
-	compute_descriptor_norot (d_descrpt_a,
-				  d_descrpt_a_deriv,
-				  d_rij_a,
-				  d_coord3,
-				  ntypes, 
-				  d_type,
-				  region, 
-				  b_pbc,
-				  ii, 
-				  fmt_nlist_a,
-				  sec_a, 
-				  rcut_r_smth, 
-				  rcut_r);
+	compute_descriptor_se_a (d_descrpt_a,
+				 d_descrpt_a_deriv,
+				 d_rij_a,
+				 d_coord3,
+				 ntypes, 
+				 d_type,
+				 region, 
+				 b_pbc,
+				 ii, 
+				 fmt_nlist_a,
+				 sec_a, 
+				 rcut_r_smth, 
+				 rcut_r);
 
 	// check sizes
 	assert (d_descrpt_a.size() == ndescrpt_a);
@@ -395,5 +395,5 @@ class DescrptNorotOp : public OpKernel {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("DescrptNorot").Device(DEVICE_CPU), DescrptNorotOp);
+REGISTER_KERNEL_BUILDER(Name("DescrptSeA").Device(DEVICE_CPU), DescrptSeAOp);
 
diff --git a/source/op/descrpt_se_r.cc b/source/op/descrpt_se_r.cc
new file mode 100644
index 0000000000..04624d1e83
--- /dev/null
+++ b/source/op/descrpt_se_r.cc
@@ -0,0 +1,381 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+#include "ComputeDescriptor.h"
+#include "NeighborList.h"
+
+typedef double boxtensor_t ;
+typedef double compute_t;
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE ;
+#else 
+typedef float  VALUETYPE ;
+#endif
+
+REGISTER_OP("DescrptSeR")
+#ifdef HIGH_PREC
+.Input("coord: double")
+.Input("type: int32")
+.Input("natoms: int32")
+.Input("box: double")
+.Input("mesh: int32")
+.Input("davg: double")
+.Input("dstd: double")
+.Attr("rcut: float")
+.Attr("rcut_smth: float")
+.Attr("sel: list(int)")
+.Output("descrpt: double")
+.Output("descrpt_deriv: double")
+.Output("rij: double")
+.Output("nlist: int32");
+#else
+.Input("coord: float")
+.Input("type: int32")
+.Input("natoms: int32")
+.Input("box: float")
+.Input("mesh: int32")
+.Input("davg: float")
+.Input("dstd: float")
+.Attr("rcut: float")
+.Attr("rcut_smth: float")
+.Attr("sel: list(int)")
+.Output("descrpt: float")
+.Output("descrpt_deriv: float")
+.Output("rij: float")
+.Output("nlist: int32");
+#endif
+
+class DescrptSeROp : public OpKernel {
+public:
+  explicit DescrptSeROp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("rcut", &rcut));
+    OP_REQUIRES_OK(context, context->GetAttr("rcut_smth", &rcut_smth));
+    OP_REQUIRES_OK(context, context->GetAttr("sel", &sel));
+    cum_sum (sec, sel);
+    sel_null.resize(3, 0);
+    cum_sum (sec_null, sel_null);
+    ndescrpt = sec.back() * 1;
+    nnei = sec.back();
+    fill_nei_a = true;
+    count_nei_idx_overflow = 0;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& coord_tensor	= context->input(context_input_index++);
+    const Tensor& type_tensor	= context->input(context_input_index++);
+    const Tensor& natoms_tensor	= context->input(context_input_index++);
+    const Tensor& box_tensor	= context->input(context_input_index++);
+    const Tensor& mesh_tensor	= context->input(context_input_index++);
+    const Tensor& avg_tensor	= context->input(context_input_index++);
+    const Tensor& std_tensor	= context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of coord should be 2"));
+    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of type should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of box should be 2"));
+    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of mesh should be 1"));
+    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of avg should be 2"));
+    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of std should be 2"));
+    OP_REQUIRES (context, (fill_nei_a),				errors::InvalidArgument ("Rotational free descriptor only support the case rcut_a < 0"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),		errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    int ntypes = natoms_tensor.shape().dim_size(0) - 2;
+    int nsamples = coord_tensor.shape().dim_size(0);
+
+    // check the sizes
+    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of avg should be ntype"));
+    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of std should be ntype"));
+
+    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
+    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
+    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
+    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of avg should be ndescrpt"));
+    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of std should be ndescrpt"));
+
+    int nei_mode = 0;
+    if (mesh_tensor.shape().dim_size(0) == 16) {
+      nei_mode = 3;
+    }
+    else if (mesh_tensor.shape().dim_size(0) == 12) {
+      nei_mode = 2;
+    }
+    else if (mesh_tensor.shape().dim_size(0) == 6) {
+      assert (nloc == nall);
+      nei_mode = 1;
+    }
+
+    // Create an output tensor
+    TensorShape descrpt_shape ;
+    descrpt_shape.AddDim (nsamples);
+    descrpt_shape.AddDim (nloc * ndescrpt);
+    TensorShape descrpt_deriv_shape ;
+    descrpt_deriv_shape.AddDim (nsamples);
+    descrpt_deriv_shape.AddDim (nloc * ndescrpt * 3);
+    TensorShape rij_shape ;
+    rij_shape.AddDim (nsamples);
+    rij_shape.AddDim (nloc * nnei * 3);
+    TensorShape nlist_shape ;
+    nlist_shape.AddDim (nsamples);
+    nlist_shape.AddDim (nloc * nnei);
+
+    int context_output_index = 0;
+    Tensor* descrpt_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
+						     descrpt_shape, 
+						     &descrpt_tensor));
+    Tensor* descrpt_deriv_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
+						     descrpt_deriv_shape, 
+						     &descrpt_deriv_tensor));
+    Tensor* rij_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
+						     rij_shape,
+						     &rij_tensor));
+    Tensor* nlist_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
+						     nlist_shape,
+						     &nlist_tensor));
+    
+    auto coord	= coord_tensor	.matrix<VALUETYPE>();
+    auto type	= type_tensor	.matrix<int>();
+    auto box	= box_tensor	.matrix<VALUETYPE>();
+    auto mesh	= mesh_tensor	.flat<int>();
+    auto avg	= avg_tensor	.matrix<VALUETYPE>();
+    auto std	= std_tensor	.matrix<VALUETYPE>();
+    auto descrpt	= descrpt_tensor	->matrix<VALUETYPE>();
+    auto descrpt_deriv	= descrpt_deriv_tensor	->matrix<VALUETYPE>();
+    auto rij		= rij_tensor		->matrix<VALUETYPE>();
+    auto nlist		= nlist_tensor		->matrix<int>();
+
+    OP_REQUIRES (context, (ntypes == int(sel.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
+
+    for (int kk = 0; kk < nsamples; ++kk){
+      // set region
+      boxtensor_t boxt [9] = {0};
+      for (int dd = 0; dd < 9; ++dd) {
+	boxt[dd] = box(kk, dd);
+      }
+      SimulationRegion<compute_t > region;
+      region.reinitBox (boxt);
+
+      // set & normalize coord
+      vector<compute_t > d_coord3 (nall*3);
+      for (int ii = 0; ii < nall; ++ii){
+	for (int dd = 0; dd < 3; ++dd){
+	  d_coord3[ii*3+dd] = coord(kk, ii*3+dd);
+	}
+	if (nei_mode <= 1){
+	  compute_t inter[3];
+	  region.phys2Inter (inter, &d_coord3[3*ii]);
+	  for (int dd = 0; dd < 3; ++dd){
+	    if      (inter[dd] < 0 ) inter[dd] += 1.;
+	    else if (inter[dd] >= 1) inter[dd] -= 1.;
+	  }
+	  region.inter2Phys (&d_coord3[3*ii], inter);
+	}
+      }
+
+      // set type
+      vector<int > d_type (nall);
+      for (int ii = 0; ii < nall; ++ii) d_type[ii] = type(kk, ii);
+      
+      // build nlist
+      vector<vector<int > > d_nlist;
+      vector<vector<int > > d_nlist_null;
+      vector<int> nlist_map;
+      bool b_nlist_map = false;
+      if (nei_mode == 3) {	
+	int * pilist, *pjrange, *pjlist;
+	memcpy (&pilist, &mesh(4), sizeof(int *));
+	memcpy (&pjrange, &mesh(8), sizeof(int *));
+	memcpy (&pjlist, &mesh(12), sizeof(int *));
+	int inum = mesh(1);
+	assert (inum == nloc);
+	d_nlist_null.resize (inum);
+	d_nlist.resize (inum);
+	for (unsigned ii = 0; ii < inum; ++ii){
+	  d_nlist.reserve (pjrange[inum] / inum + 10);
+	}
+	for (unsigned ii = 0; ii < inum; ++ii){
+	  int i_idx = pilist[ii];
+	  for (unsigned jj = pjrange[ii]; jj < pjrange[ii+1]; ++jj){
+	    int j_idx = pjlist[jj];
+	    d_nlist[i_idx].push_back (j_idx);
+	  }
+	}
+      }
+      else if (nei_mode == 2) {
+	vector<int > nat_stt = {mesh(1-1), mesh(2-1), mesh(3-1)};
+	vector<int > nat_end = {mesh(4-1), mesh(5-1), mesh(6-1)};
+	vector<int > ext_stt = {mesh(7-1), mesh(8-1), mesh(9-1)};
+	vector<int > ext_end = {mesh(10-1), mesh(11-1), mesh(12-1)};
+	vector<int > global_grid (3);
+	for (int dd = 0; dd < 3; ++dd) global_grid[dd] = nat_end[dd] - nat_stt[dd];
+	::build_nlist (d_nlist_null, d_nlist, d_coord3, nloc, -1, rcut, nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
+      }
+      else if (nei_mode == 1) {
+	vector<double > bk_d_coord3 = d_coord3;
+	vector<int > bk_d_type = d_type;
+	vector<int > ncell, ngcell;
+	copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, bk_d_type, rcut, region);	
+	b_nlist_map = true;
+	vector<int> nat_stt(3, 0);
+	vector<int> ext_stt(3), ext_end(3);
+	for (int dd = 0; dd < 3; ++dd){
+	  ext_stt[dd] = -ngcell[dd];
+	  ext_end[dd] = ncell[dd] + ngcell[dd];
+	}
+	::build_nlist (d_nlist_null, d_nlist, d_coord3, nloc, -1, rcut, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+      }
+      else {
+	build_nlist (d_nlist_null, d_nlist, -1, rcut, d_coord3, region);      
+      }
+
+      bool b_pbc = true;
+      // if region is given extended, do not use pbc
+      if (nei_mode >= 1) {
+	b_pbc = false;
+      }
+
+      // loop over atoms, compute descriptors for each atom
+#pragma omp parallel for 
+      for (int ii = 0; ii < nloc; ++ii){
+	vector<int> fmt_nlist_null;
+	vector<int> fmt_nlist;
+	int ret = -1;
+	if (fill_nei_a){
+	  if ((ret = format_nlist_fill_a (fmt_nlist, fmt_nlist_null, d_coord3, ntypes, d_type, region, b_pbc, ii, d_nlist_null[ii], d_nlist[ii], rcut, sec, sec_null)) != -1){
+	    if (count_nei_idx_overflow == 0) {
+	      cout << "WARNING: Radial neighbor list length of type " << ret << " is not enough" << endl;
+	      flush(cout);
+	      count_nei_idx_overflow ++;
+	    }
+	  }
+	}
+	// cout << ii << " " ;
+	// for (int jj = 0 ; jj < fmt_nlist.size(); ++jj){
+	//   cout << fmt_nlist[jj] << " " ;
+	// }
+	// cout << endl;
+
+	vector<compute_t > d_descrpt;
+	vector<compute_t > d_descrpt_deriv;
+	vector<compute_t > d_rij;
+	compute_descriptor_se_r (d_descrpt,
+				  d_descrpt_deriv,
+				  d_rij,
+				  d_coord3,
+				  ntypes, 
+				  d_type,
+				  region, 
+				  b_pbc,
+				  ii, 
+				  fmt_nlist,
+				  sec, 
+				  rcut_smth, 
+				  rcut);
+
+	// check sizes
+	assert (d_descrpt_deriv.size() == ndescrpt * 3);
+	assert (d_rij.size() == nnei * 3);
+	assert (int(fmt_nlist.size()) == nnei);
+	// record outputs
+	for (int jj = 0; jj < ndescrpt; ++jj) {
+	  descrpt(kk, ii * ndescrpt + jj) = (d_descrpt[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
+	}
+	for (int jj = 0; jj < ndescrpt * 3; ++jj) {
+	  descrpt_deriv(kk, ii * ndescrpt * 3 + jj) = d_descrpt_deriv[jj] / std(d_type[ii], jj/3);
+	}
+	for (int jj = 0; jj < nnei * 3; ++jj){
+	  rij (kk, ii * nnei * 3 + jj) = d_rij[jj];
+	}
+	for (int jj = 0; jj < nnei; ++jj){
+	  int record = fmt_nlist[jj];
+	  if (b_nlist_map && record >= 0) {
+	    record = nlist_map[record];
+	  }
+	  nlist (kk, ii * nnei + jj) = record;
+	}
+      }
+    }
+  }
+private:
+  float rcut;
+  float rcut_smth;
+  vector<int32> sel;
+  vector<int32> sel_null;
+  vector<int> sec;
+  vector<int> sec_null;
+  int ndescrpt;
+  int nnei;
+  bool fill_nei_a;
+  int count_nei_idx_overflow;
+  void 
+  cum_sum (vector<int> & sec,
+	   const vector<int32> & n_sel) const {
+    sec.resize (n_sel.size() + 1);
+    sec[0] = 0;
+    for (int ii = 1; ii < sec.size(); ++ii){
+      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    }
+  }
+  void 
+  build_nlist (vector<vector<int > > & nlist0,
+	       vector<vector<int > > & nlist1,
+	       const compute_t & rc0_,
+	       const compute_t & rc1_,
+	       const vector<compute_t > & posi3,
+	       const SimulationRegion<compute_t > & region) const {
+    compute_t rc0 (rc0_);
+    compute_t rc1 (rc1_);
+    assert (rc0 <= rc1);
+    compute_t rc02 = rc0 * rc0;
+    // negative rc0 means not applying rc0
+    if (rc0 < 0) rc02 = 0;
+    compute_t rc12 = rc1 * rc1;
+
+    unsigned natoms = posi3.size()/3;
+    nlist0.clear();
+    nlist1.clear();
+    nlist0.resize(natoms);
+    nlist1.resize(natoms);
+    for (unsigned ii = 0; ii < natoms; ++ii){
+      nlist0[ii].reserve (60);
+      nlist1[ii].reserve (60);
+    }
+    for (unsigned ii = 0; ii < natoms; ++ii){
+      for (unsigned jj = ii+1; jj < natoms; ++jj){
+	compute_t diff[3];
+	region.diffNearestNeighbor (posi3[jj*3+0], posi3[jj*3+1], posi3[jj*3+2],
+				    posi3[ii*3+0], posi3[ii*3+1], posi3[ii*3+2],
+				    diff[0], diff[1], diff[2]);
+	compute_t r2 = MathUtilities::dot<compute_t> (diff, diff);
+	if (r2 < rc02) {
+	  nlist0[ii].push_back (jj);
+	  nlist0[jj].push_back (ii);
+	}
+	else if (r2 < rc12) {
+	  nlist1[ii].push_back (jj);
+	  nlist1[jj].push_back (ii);
+	}
+      }
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("DescrptSeR").Device(DEVICE_CPU), DescrptSeROp);
+
diff --git a/source/op/prod_force_norot.cc b/source/op/prod_force_se_a.cc
similarity index 95%
rename from source/op/prod_force_norot.cc
rename to source/op/prod_force_se_a.cc
index 6f4d45e6e3..af0e712492 100644
--- a/source/op/prod_force_norot.cc
+++ b/source/op/prod_force_se_a.cc
@@ -13,7 +13,7 @@ typedef float  VALUETYPE;
 #endif
 
 #ifdef HIGH_PREC
-REGISTER_OP("ProdForceNorot")
+REGISTER_OP("ProdForceSeA")
 .Input("net_deriv: double")
 .Input("in_deriv: double")
 .Input("nlist: int32")
@@ -22,7 +22,7 @@ REGISTER_OP("ProdForceNorot")
 .Attr("n_r_sel: int")
 .Output("force: double");
 #else
-REGISTER_OP("ProdForceNorot")
+REGISTER_OP("ProdForceSeA")
 .Input("net_deriv: float")
 .Input("in_deriv: float")
 .Input("nlist: int32")
@@ -34,9 +34,9 @@ REGISTER_OP("ProdForceNorot")
 
 using namespace tensorflow;
 
-class ProdForceNorotOp : public OpKernel {
+class ProdForceSeAOp : public OpKernel {
  public:
-  explicit ProdForceNorotOp(OpKernelConstruction* context) : OpKernel(context) {
+  explicit ProdForceSeAOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
     OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
@@ -155,7 +155,7 @@ class ProdForceNorotOp : public OpKernel {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("ProdForceNorot").Device(DEVICE_CPU), ProdForceNorotOp);
+REGISTER_KERNEL_BUILDER(Name("ProdForceSeA").Device(DEVICE_CPU), ProdForceSeAOp);
 
 
 
diff --git a/source/op/prod_force_norot_grad.cc b/source/op/prod_force_se_a_grad.cc
similarity index 95%
rename from source/op/prod_force_norot_grad.cc
rename to source/op/prod_force_se_a_grad.cc
index b640359908..eda965974a 100644
--- a/source/op/prod_force_norot_grad.cc
+++ b/source/op/prod_force_se_a_grad.cc
@@ -13,7 +13,7 @@ typedef float  VALUETYPE;
 #endif
 
 #ifdef HIGH_PREC
-REGISTER_OP("ProdForceNorotGrad")
+REGISTER_OP("ProdForceSeAGrad")
 .Input("grad: double")
 .Input("net_deriv: double")
 .Input("in_deriv: double")
@@ -23,7 +23,7 @@ REGISTER_OP("ProdForceNorotGrad")
 .Attr("n_r_sel: int")
 .Output("grad_net: double");
 #else
-REGISTER_OP("ProdForceNorotGrad")
+REGISTER_OP("ProdForceSeAGrad")
 .Input("grad: float")
 .Input("net_deriv: float")
 .Input("in_deriv: float")
@@ -34,10 +34,10 @@ REGISTER_OP("ProdForceNorotGrad")
 .Output("grad_net: float");
 #endif
 
-class ProdForceNorotGradOp : public OpKernel 
+class ProdForceSeAGradOp : public OpKernel 
 {
 public:
-  explicit ProdForceNorotGradOp(OpKernelConstruction* context) : OpKernel(context) {
+  explicit ProdForceSeAGradOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
     OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
     n_a_shift = n_a_sel * 4;
@@ -158,4 +158,4 @@ class ProdForceNorotGradOp : public OpKernel
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("ProdForceNorotGrad").Device(DEVICE_CPU), ProdForceNorotGradOp);
+REGISTER_KERNEL_BUILDER(Name("ProdForceSeAGrad").Device(DEVICE_CPU), ProdForceSeAGradOp);
diff --git a/source/op/prod_force_se_r.cc b/source/op/prod_force_se_r.cc
new file mode 100644
index 0000000000..b4933c5b4a
--- /dev/null
+++ b/source/op/prod_force_se_r.cc
@@ -0,0 +1,132 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+REGISTER_OP("ProdForceSeR")
+#ifdef HIGH_PREC
+.Input("net_deriv: double")
+.Input("in_deriv: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("force: double");
+#else
+.Input("net_deriv: float")
+.Input("in_deriv: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("force: float");
+#endif
+
+using namespace tensorflow;
+
+class ProdForceSeROp : public OpKernel {
+ public:
+  explicit ProdForceSeROp(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& nlist_tensor		= context->input(context_input_index++);
+    const Tensor& natoms_tensor		= context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of net deriv should be 2"));
+    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of input deriv should be 2"));
+    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = net_deriv_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+
+    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
+
+    // Create an output tensor
+    TensorShape force_shape ;
+    force_shape.AddDim (nframes);
+    force_shape.AddDim (3 * nall);
+    Tensor* force_tensor = NULL;
+    int context_output_index = 0;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+						     force_shape, &force_tensor));
+    
+    // flat the tensors
+    auto net_deriv = net_deriv_tensor.flat<VALUETYPE>();
+    auto in_deriv = in_deriv_tensor.flat<VALUETYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto force = force_tensor->flat<VALUETYPE>();
+
+    assert (nframes == force_shape.dim_size(0));
+    assert (nframes == net_deriv_tensor.shape().dim_size(0));
+    assert (nframes == in_deriv_tensor.shape().dim_size(0));
+    assert (nframes == nlist_tensor.shape().dim_size(0));
+    assert (nall * 3 == force_shape.dim_size(1));
+    assert (nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
+    assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert (nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert (nnei * 1 == ndescrpt);
+    
+    // loop over samples
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk){
+      int force_iter	= kk * nall * 3;
+      int net_iter	= kk * nloc * ndescrpt;
+      int in_iter	= kk * nloc * ndescrpt * 3;
+      int nlist_iter	= kk * nloc * nnei;
+
+      for (int ii = 0; ii < nall; ++ii){
+	int i_idx = ii;
+	force (force_iter + i_idx * 3 + 0) = 0;
+	force (force_iter + i_idx * 3 + 1) = 0;
+	force (force_iter + i_idx * 3 + 2) = 0;
+      }
+
+      // compute force of a frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;	
+	// deriv wrt center atom
+	for (int aa = 0; aa < ndescrpt; ++aa){
+	  force (force_iter + i_idx * 3 + 0) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + 0);
+	  force (force_iter + i_idx * 3 + 1) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + 1);
+	  force (force_iter + i_idx * 3 + 2) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + 2);
+	}
+	// deriv wrt neighbors
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);
+	  // if (j_idx > nloc) j_idx = j_idx % nloc;
+	  if (j_idx < 0) continue;
+	  force (force_iter + j_idx * 3 + 0) += net_deriv (net_iter + i_idx * ndescrpt + jj) * in_deriv (in_iter + i_idx * ndescrpt * 3 + jj * 3 + 0);
+	  force (force_iter + j_idx * 3 + 1) += net_deriv (net_iter + i_idx * ndescrpt + jj) * in_deriv (in_iter + i_idx * ndescrpt * 3 + jj * 3 + 1);
+	  force (force_iter + j_idx * 3 + 2) += net_deriv (net_iter + i_idx * ndescrpt + jj) * in_deriv (in_iter + i_idx * ndescrpt * 3 + jj * 3 + 2);
+	}
+      }
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("ProdForceSeR").Device(DEVICE_CPU), ProdForceSeROp);
+
+
+
diff --git a/source/op/prod_force_se_r_grad.cc b/source/op/prod_force_se_r_grad.cc
new file mode 100644
index 0000000000..3866ef9b86
--- /dev/null
+++ b/source/op/prod_force_se_r_grad.cc
@@ -0,0 +1,134 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("ProdForceSeRGrad")
+.Input("grad: double")
+.Input("net_deriv: double")
+.Input("in_deriv: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("grad_net: double");
+#else
+REGISTER_OP("ProdForceSeRGrad")
+.Input("grad: float")
+.Input("net_deriv: float")
+.Input("in_deriv: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("grad_net: float");
+#endif
+
+class ProdForceSeRGradOp : public OpKernel 
+{
+public:
+  explicit ProdForceSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& grad_tensor		= context->input(context_input_index++);
+    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& nlist_tensor		= context->input(context_input_index++);
+    const Tensor& natoms_tensor		= context->input(context_input_index++);
+
+    // set size of the sample
+    TensorShape grad_shape		= grad_tensor.shape();
+    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
+    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
+    TensorShape nlist_shape		= nlist_tensor.shape();
+
+    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
+    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
+    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
+    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = net_deriv_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    
+    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
+    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
+
+    // Create an output tensor
+    TensorShape grad_net_shape ;
+    grad_net_shape.AddDim (nframes);
+    grad_net_shape.AddDim (nloc * ndescrpt);
+
+    // allocate the output tensor
+    Tensor* grad_net_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+    
+    // flat the tensors
+    auto grad		= grad_tensor		.flat<VALUETYPE>();
+    auto net_deriv	= net_deriv_tensor	.flat<VALUETYPE>();
+    auto in_deriv	= in_deriv_tensor	.flat<VALUETYPE>();
+    auto nlist		= nlist_tensor		.flat<int>();
+    auto grad_net	= grad_net_tensor	->flat<VALUETYPE>();
+
+    // loop over frames
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk){
+
+      int grad_iter	= kk * nloc * 3;
+      int net_iter	= kk * nloc * ndescrpt;
+      int in_iter	= kk * nloc * ndescrpt * 3;
+      int nlist_iter	= kk * nloc * nnei;
+      int grad_net_iter	= kk * nloc * ndescrpt;
+
+      // reset the frame to 0
+      for (int ii = 0; ii < nloc; ++ii){
+	for (int aa = 0; aa < ndescrpt; ++aa){
+	  grad_net (grad_net_iter + ii * ndescrpt + aa) = 0;
+	}
+      }      
+
+      // compute grad of one frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+	
+	// deriv wrt center atom
+	for (int aa = 0; aa < ndescrpt; ++aa){
+	  for (int dd = 0; dd < 3; ++dd){
+	    grad_net (grad_net_iter + i_idx * ndescrpt + aa) -= grad (grad_iter + i_idx * 3 + dd) * in_deriv (in_iter + i_idx * ndescrpt * 3 + aa * 3 + dd);
+	  }
+	}
+
+	// loop over neighbors
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);	  
+	  if (j_idx > nloc) j_idx = j_idx % nloc;
+	  if (j_idx < 0) continue;
+	  for (int dd = 0; dd < 3; ++dd){
+	    grad_net (grad_net_iter + i_idx * ndescrpt + jj) += grad (grad_iter + j_idx * 3 + dd) * in_deriv (in_iter + i_idx * ndescrpt * 3 + jj * 3 + dd);
+	  }
+	}
+      }
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("ProdForceSeRGrad").Device(DEVICE_CPU), ProdForceSeRGradOp);
diff --git a/source/op/prod_virial_norot.cc b/source/op/prod_virial_se_a.cc
similarity index 95%
rename from source/op/prod_virial_norot.cc
rename to source/op/prod_virial_se_a.cc
index fb07127ae6..89077750af 100644
--- a/source/op/prod_virial_norot.cc
+++ b/source/op/prod_virial_se_a.cc
@@ -13,7 +13,7 @@ typedef float  VALUETYPE;
 #endif
 
 #ifdef HIGH_PREC
-REGISTER_OP("ProdVirialNorot")
+REGISTER_OP("ProdVirialSeA")
 .Input("net_deriv: double")
 .Input("in_deriv: double")
 .Input("rij: double")
@@ -25,7 +25,7 @@ REGISTER_OP("ProdVirialNorot")
 .Output("atom_virial: double")
 ;
 #else
-REGISTER_OP("ProdVirialNorot")
+REGISTER_OP("ProdVirialSeA")
 .Input("net_deriv: float")
 .Input("in_deriv: float")
 .Input("rij: float")
@@ -40,9 +40,9 @@ REGISTER_OP("ProdVirialNorot")
 
 using namespace tensorflow;
 
-class ProdVirialNorotOp : public OpKernel {
+class ProdVirialSeAOp : public OpKernel {
  public:
-  explicit ProdVirialNorotOp(OpKernelConstruction* context) : OpKernel(context) {
+  explicit ProdVirialSeAOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
     OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
@@ -161,7 +161,7 @@ class ProdVirialNorotOp : public OpKernel {
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("ProdVirialNorot").Device(DEVICE_CPU), ProdVirialNorotOp);
+REGISTER_KERNEL_BUILDER(Name("ProdVirialSeA").Device(DEVICE_CPU), ProdVirialSeAOp);
 
 
 
diff --git a/source/op/prod_virial_norot_grad.cc b/source/op/prod_virial_se_a_grad.cc
similarity index 95%
rename from source/op/prod_virial_norot_grad.cc
rename to source/op/prod_virial_se_a_grad.cc
index 3263e3c0de..0d19a1c19a 100644
--- a/source/op/prod_virial_norot_grad.cc
+++ b/source/op/prod_virial_se_a_grad.cc
@@ -13,7 +13,7 @@ typedef float  VALUETYPE;
 #endif
 
 #ifdef HIGH_PREC
-REGISTER_OP("ProdVirialNorotGrad")
+REGISTER_OP("ProdVirialSeAGrad")
 .Input("grad: double")
 .Input("net_deriv: double")
 .Input("in_deriv: double")
@@ -24,7 +24,7 @@ REGISTER_OP("ProdVirialNorotGrad")
 .Attr("n_r_sel: int")
 .Output("grad_net: double");
 #else
-REGISTER_OP("ProdVirialNorotGrad")
+REGISTER_OP("ProdVirialSeAGrad")
 .Input("grad: float")
 .Input("net_deriv: float")
 .Input("in_deriv: float")
@@ -36,10 +36,10 @@ REGISTER_OP("ProdVirialNorotGrad")
 .Output("grad_net: float");
 #endif
 
-class ProdVirialNorotGradOp : public OpKernel 
+class ProdVirialSeAGradOp : public OpKernel 
 {
 public:
-  explicit ProdVirialNorotGradOp(OpKernelConstruction* context) : OpKernel(context) {
+  explicit ProdVirialSeAGradOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
     OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
     n_a_shift = n_a_sel * 4;
@@ -162,4 +162,4 @@ class ProdVirialNorotGradOp : public OpKernel
   }
 };
 
-REGISTER_KERNEL_BUILDER(Name("ProdVirialNorotGrad").Device(DEVICE_CPU), ProdVirialNorotGradOp);
+REGISTER_KERNEL_BUILDER(Name("ProdVirialSeAGrad").Device(DEVICE_CPU), ProdVirialSeAGradOp);
diff --git a/source/op/prod_virial_se_r.cc b/source/op/prod_virial_se_r.cc
new file mode 100644
index 0000000000..f9b5a71d84
--- /dev/null
+++ b/source/op/prod_virial_se_r.cc
@@ -0,0 +1,139 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("ProdVirialSeR")
+.Input("net_deriv: double")
+.Input("in_deriv: double")
+.Input("rij: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("virial: double")
+.Output("atom_virial: double")
+;
+#else
+REGISTER_OP("ProdVirialSeR")
+.Input("net_deriv: float")
+.Input("in_deriv: float")
+.Input("rij: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("virial: float")
+.Output("atom_virial: float")
+;
+#endif
+
+using namespace tensorflow;
+
+class ProdVirialSeROp : public OpKernel {
+ public:
+  explicit ProdVirialSeROp(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& rij_tensor		= context->input(context_input_index++);
+    const Tensor& nlist_tensor		= context->input(context_input_index++);
+    const Tensor& natoms_tensor		= context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of net deriv should be 2"));
+    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of input deriv should be 2"));
+    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
+    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = net_deriv_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+
+    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("dim of rij should be nnei * 3"));
+
+    // Create an output tensor
+    TensorShape virial_shape ;
+    virial_shape.AddDim (nframes);
+    virial_shape.AddDim (9);
+    Tensor* virial_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, virial_shape, &virial_tensor));
+    TensorShape atom_virial_shape ;
+    atom_virial_shape.AddDim (nframes);
+    atom_virial_shape.AddDim (9 * nall);
+    Tensor* atom_virial_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape, &atom_virial_tensor));
+    
+    // flat the tensors
+    auto net_deriv = net_deriv_tensor.flat<VALUETYPE>();
+    auto in_deriv = in_deriv_tensor.flat<VALUETYPE>();
+    auto rij = rij_tensor.flat<VALUETYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto virial = virial_tensor->flat<VALUETYPE>();
+    auto atom_virial = atom_virial_tensor->flat<VALUETYPE>();
+
+    // loop over samples
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk){
+      int net_iter	= kk * nloc * ndescrpt;
+      int in_iter	= kk * nloc * ndescrpt * 3;
+      int rij_iter	= kk * nloc * nnei * 3;
+      int nlist_iter	= kk * nloc * nnei;
+      int virial_iter	= kk * 9;
+      int atom_virial_iter	= kk * nall * 9;
+
+      for (int ii = 0; ii < 9; ++ ii){
+	virial (virial_iter + ii) = 0.;
+      }
+      for (int ii = 0; ii < 9 * nall; ++ ii){
+	atom_virial (atom_virial_iter + ii) = 0.;
+      }
+
+      // compute virial of a frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+
+	// deriv wrt neighbors
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);
+	  if (j_idx < 0) continue;
+	  VALUETYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + jj);
+	  for (int dd0 = 0; dd0 < 3; ++dd0){
+	    for (int dd1 = 0; dd1 < 3; ++dd1){
+	      VALUETYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) *  in_deriv (in_iter + i_idx * ndescrpt * 3 + jj * 3 + dd1);
+	      virial (virial_iter + dd0 * 3 + dd1) -= tmp_v;
+	      atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) -= tmp_v;
+	    }
+	  }
+	}
+      }
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("ProdVirialSeR").Device(DEVICE_CPU), ProdVirialSeROp);
+
+
+
diff --git a/source/op/prod_virial_se_r_grad.cc b/source/op/prod_virial_se_r_grad.cc
new file mode 100644
index 0000000000..002aa1b907
--- /dev/null
+++ b/source/op/prod_virial_se_r_grad.cc
@@ -0,0 +1,138 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("ProdVirialSeRGrad")
+.Input("grad: double")
+.Input("net_deriv: double")
+.Input("in_deriv: double")
+.Input("rij: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("grad_net: double");
+#else
+REGISTER_OP("ProdVirialSeRGrad")
+.Input("grad: float")
+.Input("net_deriv: float")
+.Input("in_deriv: float")
+.Input("rij: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Output("grad_net: float");
+#endif
+
+class ProdVirialSeRGradOp : public OpKernel 
+{
+public:
+  explicit ProdVirialSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& grad_tensor		= context->input(context_input_index++);
+    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& rij_tensor		= context->input(context_input_index++);
+    const Tensor& nlist_tensor		= context->input(context_input_index++);
+    const Tensor& natoms_tensor		= context->input(context_input_index++);
+
+    // set size of the sample
+    TensorShape grad_shape		= grad_tensor.shape();
+    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
+    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
+    TensorShape rij_shape		= rij_tensor.shape();
+    TensorShape nlist_shape		= nlist_tensor.shape();
+
+    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
+    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
+    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
+    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
+    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = net_deriv_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    
+    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
+    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
+
+    // Create an output tensor
+    TensorShape grad_net_shape ;
+    grad_net_shape.AddDim (nframes);
+    grad_net_shape.AddDim (nloc * ndescrpt);
+
+    // allocate the output tensor
+    Tensor* grad_net_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+    
+    // flat the tensors
+    auto grad		= grad_tensor		.flat<VALUETYPE>();
+    auto net_deriv	= net_deriv_tensor	.flat<VALUETYPE>();
+    auto in_deriv	= in_deriv_tensor	.flat<VALUETYPE>();
+    auto rij		= rij_tensor		.flat<VALUETYPE>();
+    auto nlist		= nlist_tensor		.flat<int>();
+    auto grad_net	= grad_net_tensor	->flat<VALUETYPE>();
+
+    // loop over frames
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk){
+
+      int grad_iter	= kk * 9;
+      int net_iter	= kk * nloc * ndescrpt;
+      int in_iter	= kk * nloc * ndescrpt * 3;
+      int rij_iter	= kk * nloc * nnei * 3;
+      int nlist_iter	= kk * nloc * nnei;
+      int grad_net_iter	= kk * nloc * ndescrpt;
+
+      // reset the frame to 0
+      for (int ii = 0; ii < nloc; ++ii){
+	for (int aa = 0; aa < ndescrpt; ++aa){
+	  grad_net (grad_net_iter + ii * ndescrpt + aa) = 0;
+	}
+      }      
+
+      // compute grad of one frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+	
+	// loop over neighbors
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);	  
+	  if (j_idx < 0) continue;
+	  for (int dd0 = 0; dd0 < 3; ++dd0){
+	    for (int dd1 = 0; dd1 < 3; ++dd1){
+	      grad_net (grad_net_iter + i_idx * ndescrpt + jj) -= 
+		  -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd0) * in_deriv (in_iter + i_idx * ndescrpt * 3 + jj * 3 + dd1);
+	    }
+	  }
+	}
+      }
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("ProdVirialSeRGrad").Device(DEVICE_CPU), ProdVirialSeRGradOp);
diff --git a/source/op/soft_min.cc b/source/op/soft_min.cc
new file mode 100644
index 0000000000..2a70f1c5d6
--- /dev/null
+++ b/source/op/soft_min.cc
@@ -0,0 +1,202 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+#include "ComputeDescriptor.h"
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("SoftMinSwitch")
+.Input("type: int32")
+.Input("rij: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("sel_a: list(int)")
+.Attr("sel_r: list(int)")
+.Attr("alpha: float")
+.Attr("rmin: float")
+.Attr("rmax: float")
+.Output("sw_value: double")
+.Output("sw_deriv: double");
+#else
+REGISTER_OP("SoftMinSwitch")
+.Input("type: int32")
+.Input("rij: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("sel_a: list(int)")
+.Attr("sel_r: list(int)")
+.Attr("alpha: float")
+.Attr("rmin: float")
+.Attr("rmax: float")
+.Output("sw_value: float")
+.Output("sw_deriv: float");
+#endif
+
+using namespace tensorflow;
+
+class SoftMinSwitchOp : public OpKernel {
+ public:
+  explicit SoftMinSwitchOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("sel_a", &sel_a));
+    OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
+    OP_REQUIRES_OK(context, context->GetAttr("rmin", &rmin));
+    OP_REQUIRES_OK(context, context->GetAttr("rmax", &rmax));
+    cum_sum (sec_a, sel_a);
+    cum_sum (sec_r, sel_r);
+    nnei_a = sec_a.back();
+    nnei_r = sec_r.back();
+    nnei = nnei_a + nnei_r;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int tmp_idx = 0;
+    const Tensor& type_tensor	= context->input(tmp_idx++);
+    const Tensor& rij_tensor	= context->input(tmp_idx++);
+    const Tensor& nlist_tensor	= context->input(tmp_idx++);
+    const Tensor& natoms_tensor	= context->input(tmp_idx++);
+
+    // set size of the sample
+    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of type should be 2"));
+    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
+    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = type_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    int ntypes = natoms_tensor.shape().dim_size(0) - 2;
+    assert(sel_a.size() == ntypes);
+    assert(sel_r.size() == ntypes);
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == type_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("shape of type should be nall"));
+    OP_REQUIRES (context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of rij should be 3 * nloc * nnei"));
+    OP_REQUIRES (context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of nlist should be nloc * nnei"));
+
+    // Create an output tensor
+    TensorShape sw_value_shape ;
+    sw_value_shape.AddDim (nframes);
+    sw_value_shape.AddDim (nloc);
+    TensorShape sw_deriv_shape ;
+    sw_deriv_shape.AddDim (nframes);
+    sw_deriv_shape.AddDim (3 * nnei * nloc);
+    Tensor* sw_value_tensor = NULL;
+    Tensor* sw_deriv_tensor = NULL;
+    tmp_idx = 0;
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, sw_value_shape, &sw_value_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, sw_deriv_shape, &sw_deriv_tensor ));
+    
+    // flat the tensors
+    auto type	= type_tensor	.matrix<int>();
+    auto rij	= rij_tensor	.matrix<VALUETYPE>();
+    auto nlist	= nlist_tensor	.matrix<int>();
+    auto sw_value = sw_value_tensor	->matrix<VALUETYPE>();
+    auto sw_deriv = sw_deriv_tensor	->matrix<VALUETYPE>();
+
+    // loop over samples
+#pragma omp parallel for 
+    for (int kk = 0; kk < nframes; ++kk){
+      // fill results with 0
+      for (int ii = 0; ii < nloc; ++ii){
+	sw_value(kk, ii) = 0;
+      }
+      for (int ii = 0; ii < nloc * nnei; ++ii){
+	sw_deriv(kk, ii * 3 + 0) = 0;
+	sw_deriv(kk, ii * 3 + 1) = 0;
+	sw_deriv(kk, ii * 3 + 2) = 0;
+      }
+      // compute force of a frame      
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+	VALUETYPE aa = 0;
+	VALUETYPE bb = 0;
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (kk, i_idx * nnei + jj);
+	  if (j_idx < 0) continue;
+	  int rij_idx_shift = (i_idx * nnei + jj) * 3;
+	  VALUETYPE dr[3] = {
+	    rij(kk, rij_idx_shift + 0),
+	    rij(kk, rij_idx_shift + 1),
+	    rij(kk, rij_idx_shift + 2)
+	  };
+	  VALUETYPE rr2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
+	  VALUETYPE rr = sqrt(rr2);
+	  VALUETYPE ee = exp(-rr / alpha);
+	  aa += ee;
+	  bb += rr * ee;
+	}
+	VALUETYPE smin = bb / aa;
+	VALUETYPE vv, dd;
+	spline5_switch(vv, dd, smin, rmin, rmax);
+	// value of switch
+	sw_value(kk, i_idx) = vv;
+	// deriv of switch distributed as force
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (kk, i_idx * nnei + jj);
+	  if (j_idx < 0) continue;
+	  int rij_idx_shift = (ii * nnei + jj) * 3;
+	  VALUETYPE dr[3] = {
+	    rij(kk, rij_idx_shift + 0),
+	    rij(kk, rij_idx_shift + 1),
+	    rij(kk, rij_idx_shift + 2)
+	  };
+	  VALUETYPE rr2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
+	  VALUETYPE rr = sqrt(rr2);
+	  VALUETYPE ee = exp(-rr / alpha);
+	  VALUETYPE pref_c = (1./rr - 1./alpha) * ee ;
+	  VALUETYPE pref_d = 1./(rr * alpha) * ee;
+	  VALUETYPE ts;
+	  ts = dd / (aa * aa) * (aa * pref_c + bb * pref_d);
+	  sw_deriv(kk, rij_idx_shift + 0) += ts * dr[0];
+	  sw_deriv(kk, rij_idx_shift + 1) += ts * dr[1];
+	  sw_deriv(kk, rij_idx_shift + 2) += ts * dr[2];
+	  // cout << ii << " "  << jj << " " << j_idx << "   "
+	  //      << vv << " " 
+	  //      << sw_deriv(kk, rij_idx_shift+0) << " " 
+	  //      << sw_deriv(kk, rij_idx_shift+1) << " " 
+	  //      << sw_deriv(kk, rij_idx_shift+2) << " " 
+	  //      << endl;
+	}
+      }
+    }
+  }
+private:
+  vector<int32> sel_r;
+  vector<int32> sel_a;
+  vector<int> sec_a;
+  vector<int> sec_r;
+  float alpha, rmin, rmax;
+  int nnei, nnei_a, nnei_r;
+  void
+  cum_sum (vector<int> & sec,
+	   const vector<int32> & n_sel) const {
+    sec.resize (n_sel.size() + 1);
+    sec[0] = 0;
+    for (int ii = 1; ii < sec.size(); ++ii){
+      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("SoftMinSwitch").Device(DEVICE_CPU), SoftMinSwitchOp);
+
+
+
diff --git a/source/op/soft_min_force.cc b/source/op/soft_min_force.cc
new file mode 100644
index 0000000000..e51aadbc79
--- /dev/null
+++ b/source/op/soft_min_force.cc
@@ -0,0 +1,121 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("SoftMinForce")
+.Input("du: double")
+.Input("sw_deriv: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("force: double");
+#else
+REGISTER_OP("SoftMinForce")
+.Input("du: float")
+.Input("sw_deriv: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("force: float");
+#endif
+
+using namespace tensorflow;
+
+class SoftMinForceOp : public OpKernel {
+ public:
+  explicit SoftMinForceOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    const Tensor& du_tensor		= context->input(0);
+    const Tensor& sw_deriv_tensor	= context->input(1);
+    const Tensor& nlist_tensor		= context->input(2);
+    const Tensor& natoms_tensor		= context->input(3);
+
+    // set size of the sample
+    OP_REQUIRES (context, (du_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of du should be 2"));
+    OP_REQUIRES (context, (sw_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of switch deriv should be 2"));
+    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = du_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == sw_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+
+    OP_REQUIRES (context, (nloc == du_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of du should match"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of switch deriv should match"));
+    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),				errors::InvalidArgument ("number of neighbors should match"));
+
+    // Create an output tensor
+    TensorShape force_shape ;
+    force_shape.AddDim (nframes);
+    force_shape.AddDim (3 * nall);
+    Tensor* force_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, force_shape, &force_tensor));
+    
+    // flat the tensors
+    auto du = du_tensor.matrix<VALUETYPE>();
+    auto sw_deriv = sw_deriv_tensor.matrix<VALUETYPE>();
+    auto nlist = nlist_tensor.matrix<int>();
+    auto force = force_tensor->matrix<VALUETYPE>();
+
+    // loop over samples
+#pragma omp parallel for 
+    for (int kk = 0; kk < nframes; ++kk){
+      // set zeros
+      for (int ii = 0; ii < nall; ++ii){
+	int i_idx = ii;
+	force (kk, i_idx * 3 + 0) = 0;
+	force (kk, i_idx * 3 + 1) = 0;
+	force (kk, i_idx * 3 + 2) = 0;
+      }
+      // compute force of a frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;	
+	for (int jj = 0; jj < nnei; ++jj){	  
+	  int j_idx = nlist (kk, i_idx * nnei + jj);
+	  if (j_idx < 0) continue;
+	  int rij_idx_shift = (ii * nnei + jj) * 3;
+	  force(kk, i_idx * 3 + 0) += du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + 0);
+	  force(kk, i_idx * 3 + 1) += du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + 1);
+	  force(kk, i_idx * 3 + 2) += du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + 2);
+	  force(kk, j_idx * 3 + 0) -= du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + 0);
+	  force(kk, j_idx * 3 + 1) -= du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + 1);
+	  force(kk, j_idx * 3 + 2) -= du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + 2);
+	  // cout << "soft_min_force " << i_idx << " " << j_idx << " " 
+	  //      << du(kk, i_idx) << " " 
+	  //      << du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + 0)
+	  //      << endl;
+	}
+      }
+    }
+  }
+private:
+  int n_r_sel, n_a_sel;
+};
+
+REGISTER_KERNEL_BUILDER(Name("SoftMinForce").Device(DEVICE_CPU), SoftMinForceOp);
diff --git a/source/op/soft_min_force_grad.cc b/source/op/soft_min_force_grad.cc
new file mode 100644
index 0000000000..4c8a4b21ff
--- /dev/null
+++ b/source/op/soft_min_force_grad.cc
@@ -0,0 +1,128 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("SoftMinForceGrad")
+.Input("grad: double")
+.Input("du: double")
+.Input("sw_deriv: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("grad_net: double");
+#else
+REGISTER_OP("SoftMinForceGrad")
+.Input("grad: float")
+.Input("du: float")
+.Input("sw_deriv: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("grad_net: float");
+#endif
+
+class SoftMinForceGradOp : public OpKernel 
+{
+public:
+  explicit SoftMinForceGradOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& grad_tensor		= context->input(context_input_index++);
+    const Tensor& du_tensor		= context->input(context_input_index++);
+    const Tensor& sw_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& nlist_tensor		= context->input(context_input_index++);
+    const Tensor& natoms_tensor		= context->input(context_input_index++);
+
+    // set size of the sample
+    TensorShape grad_shape		= grad_tensor.shape();
+    TensorShape du_shape		= du_tensor.shape();
+    TensorShape sw_deriv_shape		= sw_deriv_tensor.shape();
+    TensorShape nlist_shape		= nlist_tensor.shape();
+
+    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
+    OP_REQUIRES (context, (du_shape.dims() == 2),	errors::InvalidArgument ("Dim of du should be 2"));
+    OP_REQUIRES (context, (sw_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of sw deriv should be 2"));
+    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = du_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == sw_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    
+    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of sw deriv should match"));
+    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+
+    // Create an output tensor
+    TensorShape grad_net_shape ;
+    grad_net_shape.AddDim (nframes);
+    grad_net_shape.AddDim (nloc);
+
+    // allocate the output tensor
+    Tensor* grad_net_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+    
+    // flat the tensors
+    auto grad		= grad_tensor		.matrix<VALUETYPE>();
+    auto du		= du_tensor		.matrix<VALUETYPE>();
+    auto sw_deriv	= sw_deriv_tensor	.matrix<VALUETYPE>();
+    auto nlist		= nlist_tensor		.matrix<int>();
+    auto grad_net	= grad_net_tensor	->matrix<VALUETYPE>();
+
+    // loop over frames
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk){
+      // reset the frame to 0
+      for (int ii = 0; ii < nloc; ++ii){
+	grad_net (kk, ii) = 0;
+      }      
+
+      // compute grad of one frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+	// deriv wrt center atom	
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (kk, i_idx * nnei + jj);	  
+	  if (j_idx < 0) continue;
+	  int rij_idx_shift = (ii * nnei + jj) * 3;
+	  grad_net(kk, i_idx) += grad(kk, i_idx * 3 + 0) * sw_deriv(kk, rij_idx_shift + 0);
+	  grad_net(kk, i_idx) += grad(kk, i_idx * 3 + 1) * sw_deriv(kk, rij_idx_shift + 1);
+	  grad_net(kk, i_idx) += grad(kk, i_idx * 3 + 2) * sw_deriv(kk, rij_idx_shift + 2);
+	  grad_net(kk, i_idx) -= grad(kk, j_idx * 3 + 0) * sw_deriv(kk, rij_idx_shift + 0);
+	  grad_net(kk, i_idx) -= grad(kk, j_idx * 3 + 1) * sw_deriv(kk, rij_idx_shift + 1);
+	  grad_net(kk, i_idx) -= grad(kk, j_idx * 3 + 2) * sw_deriv(kk, rij_idx_shift + 2);
+	}
+      }
+    }
+  }
+private:
+  int n_r_sel, n_a_sel;
+};
+
+REGISTER_KERNEL_BUILDER(Name("SoftMinForceGrad").Device(DEVICE_CPU), SoftMinForceGradOp);
diff --git a/source/op/soft_min_virial.cc b/source/op/soft_min_virial.cc
new file mode 100644
index 0000000000..193c34f981
--- /dev/null
+++ b/source/op/soft_min_virial.cc
@@ -0,0 +1,141 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("SoftMinVirial")
+.Input("du: double")
+.Input("sw_deriv: double")
+.Input("rij: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("virial: double")
+.Output("atom_virial: double")
+;
+#else
+REGISTER_OP("SoftMinVirial")
+.Input("du: float")
+.Input("sw_deriv: float")
+.Input("rij: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("virial: float")
+.Output("atom_virial: float")
+;
+#endif
+
+using namespace tensorflow;
+
+class SoftMinVirialOp : public OpKernel {
+ public:
+  explicit SoftMinVirialOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& du_tensor		= context->input(context_input_index++);
+    const Tensor& sw_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& rij_tensor		= context->input(context_input_index++);
+    const Tensor& nlist_tensor		= context->input(context_input_index++);
+    const Tensor& natoms_tensor		= context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES (context, (du_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of net deriv should be 2"));
+    OP_REQUIRES (context, (sw_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of input deriv should be 2"));
+    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
+    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = du_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == sw_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+
+    OP_REQUIRES (context, (nloc == du_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of du should match"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of sw_deriv should match"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("dim of rij should be nnei * 3"));
+    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),				errors::InvalidArgument ("number of neighbors should match"));
+
+    // Create an output tensor
+    TensorShape virial_shape ;
+    virial_shape.AddDim (nframes);
+    virial_shape.AddDim (9);
+    Tensor* virial_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, virial_shape, &virial_tensor));
+    TensorShape atom_virial_shape ;
+    atom_virial_shape.AddDim (nframes);
+    atom_virial_shape.AddDim (9 * nall);
+    Tensor* atom_virial_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape, &atom_virial_tensor));
+    
+    // flat the tensors
+    auto du = du_tensor.matrix<VALUETYPE>();
+    auto sw_deriv = sw_deriv_tensor.matrix<VALUETYPE>();
+    auto rij = rij_tensor.matrix<VALUETYPE>();
+    auto nlist = nlist_tensor.matrix<int>();
+    auto virial = virial_tensor->matrix<VALUETYPE>();
+    auto atom_virial = atom_virial_tensor->matrix<VALUETYPE>();
+
+    // loop over samples
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk){
+
+      for (int ii = 0; ii < 9; ++ ii){
+	virial (kk, ii) = 0.;
+      }
+      for (int ii = 0; ii < 9 * nall; ++ ii){
+	atom_virial (kk, ii) = 0.;
+      }
+
+      // compute virial of a frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+	// loop over neighbors
+	for (int jj = 0; jj < nnei; ++jj){	  
+	  int j_idx = nlist (kk, i_idx * nnei + jj);
+	  if (j_idx < 0) continue;
+	  int rij_idx_shift = (ii * nnei + jj) * 3;
+	  for (int dd0 = 0; dd0 < 3; ++dd0){
+	    for (int dd1 = 0; dd1 < 3; ++dd1){
+	      VALUETYPE tmp_v = du(kk, i_idx) * sw_deriv(kk, rij_idx_shift + dd0) * rij(kk, rij_idx_shift + dd1);
+	      virial(kk, dd0 * 3 + dd1) -= tmp_v;		  
+	      atom_virial(kk, j_idx * 9 + dd0 * 3 + dd1) -= tmp_v;
+	    }
+	  }
+	}
+      }      
+    }
+  }
+private:
+  int n_r_sel, n_a_sel;
+};
+
+REGISTER_KERNEL_BUILDER(Name("SoftMinVirial").Device(DEVICE_CPU), SoftMinVirialOp);
+
+
+
diff --git a/source/op/soft_min_virial_grad.cc b/source/op/soft_min_virial_grad.cc
new file mode 100644
index 0000000000..6f8703bdee
--- /dev/null
+++ b/source/op/soft_min_virial_grad.cc
@@ -0,0 +1,151 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("SoftMinVirialGrad")
+.Input("grad: double")
+.Input("du: double")
+.Input("sw_deriv: double")
+.Input("rij: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("grad_net: double");
+#else
+REGISTER_OP("SoftMinVirialGrad")
+.Input("grad: float")
+.Input("du: float")
+.Input("sw_deriv: float")
+.Input("rij: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Attr("n_a_sel: int")
+.Attr("n_r_sel: int")
+.Output("grad_net: float");
+#endif
+
+class SoftMinVirialGradOp : public OpKernel 
+{
+public:
+  explicit SoftMinVirialGradOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& grad_tensor		= context->input(context_input_index++);
+    const Tensor& du_tensor		= context->input(context_input_index++);
+    const Tensor& sw_deriv_tensor	= context->input(context_input_index++);
+    const Tensor& rij_tensor		= context->input(context_input_index++);
+    const Tensor& nlist_tensor		= context->input(context_input_index++);
+    const Tensor& natoms_tensor		= context->input(context_input_index++);
+
+    // set size of the sample
+    TensorShape grad_shape		= grad_tensor.shape();
+    TensorShape du_shape		= du_tensor.shape();
+    TensorShape sw_deriv_shape		= sw_deriv_tensor.shape();
+    TensorShape rij_shape		= rij_tensor.shape();
+    TensorShape nlist_shape		= nlist_tensor.shape();
+
+    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
+    OP_REQUIRES (context, (du_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
+    OP_REQUIRES (context, (sw_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
+    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
+    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = du_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == sw_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
+    
+    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
+    OP_REQUIRES (context, (nloc == du_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of du should match"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
+    OP_REQUIRES (context, (nloc * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
+    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+
+    // Create an output tensor
+    TensorShape grad_net_shape ;
+    grad_net_shape.AddDim (nframes);
+    grad_net_shape.AddDim (nloc);
+
+    // allocate the output tensor
+    Tensor* grad_net_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+    
+    // flat the tensors
+    auto grad		= grad_tensor		.matrix<VALUETYPE>();
+    auto du		= du_tensor		.matrix<VALUETYPE>();
+    auto sw_deriv	= sw_deriv_tensor	.matrix<VALUETYPE>();
+    auto rij		= rij_tensor		.matrix<VALUETYPE>();
+    auto nlist		= nlist_tensor		.matrix<int>();
+    auto grad_net	= grad_net_tensor	->matrix<VALUETYPE>();
+
+    // loop over frames
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk){
+
+      // reset the frame to 0
+      for (int ii = 0; ii < nloc; ++ii){
+	grad_net (kk, ii) = 0;
+      }      
+
+      // compute grad of one frame
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+	// loop over neighbors
+	for (int jj = 0; jj < nnei; ++jj){
+	  int j_idx = nlist (kk, i_idx * nnei + jj);	  
+	  if (j_idx < 0) continue;
+	  int rij_idx_shift = (ii * nnei + jj) * 3;
+	  for (int dd0 = 0; dd0 < 3; ++dd0){
+	    for (int dd1 = 0; dd1 < 3; ++dd1){
+	      grad_net (kk, i_idx) -= 
+		  grad (kk, dd0 * 3 + dd1) * sw_deriv(kk, rij_idx_shift + dd0) * rij(kk, rij_idx_shift + dd1);
+	    }
+	  }
+	}
+      }
+    }
+  }
+private:
+  int n_r_sel, n_a_sel, n_a_shift;
+  inline void 
+  make_descript_range (int & idx_start,
+		       int & idx_end,
+		       const int & nei_idx) {
+    if (nei_idx < n_a_sel) {
+      idx_start = nei_idx * 4;
+      idx_end   = nei_idx * 4 + 4;
+    }
+    else {
+      idx_start = n_a_shift + (nei_idx - n_a_sel);
+      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("SoftMinVirialGrad").Device(DEVICE_CPU), SoftMinVirialGradOp);
diff --git a/source/op/tab_inter.cc b/source/op/tab_inter.cc
new file mode 100644
index 0000000000..242e52a6e7
--- /dev/null
+++ b/source/op/tab_inter.cc
@@ -0,0 +1,324 @@
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include <iostream>
+
+using namespace tensorflow;
+using namespace std;
+
+#ifdef HIGH_PREC
+typedef double VALUETYPE;
+#else
+typedef float  VALUETYPE;
+#endif
+
+#ifdef HIGH_PREC
+REGISTER_OP("TabInter")
+.Input("table_info: double")
+.Input("table_data: double")
+.Input("type: int32")
+.Input("rij: double")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Input("scale: double")
+.Attr("sel_a: list(int)")
+.Attr("sel_r: list(int)")
+.Output("atom_energy: double")
+.Output("force: double")
+.Output("atom_virial: double");
+#else
+REGISTER_OP("TabInter")
+.Input("table_info: double")
+.Input("table_data: double")
+.Input("type: int32")
+.Input("rij: float")
+.Input("nlist: int32")
+.Input("natoms: int32")
+.Input("scale: float")
+.Attr("sel_a: list(int)")
+.Attr("sel_r: list(int)")
+.Output("atom_energy: float")
+.Output("force: float")
+.Output("atom_virial: float");
+#endif
+
+using namespace tensorflow;
+
+inline 
+void tabulated_inter (double & ener, 
+		      double & fscale, 
+		      const double * table_info,
+		      const double * table_data,
+		      const double * dr)
+{
+  // info size: 3
+  const double & rmin = table_info[0];
+  const double & hh = table_info[1];
+  const double hi = 1./hh;
+  const unsigned nspline = unsigned(table_info[2] + 0.1);
+  const unsigned ndata = nspline * 4;
+
+  double r2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
+  double rr = sqrt(r2);
+  double uu = (rr - rmin) * hi;
+  // cout << rr << " " << rmin << " " << hh << " " << uu << endl;
+  if (uu < 0) {
+    cerr << "coord go beyond table lower boundary" << endl;
+    exit(1);
+  }
+  int idx = uu;
+  if (idx >= nspline) {
+    fscale = ener = 0;
+    return;
+  }
+  uu -= idx;
+  assert(idx >= 0);
+  assert(uu >= 0 && uu < 1);
+
+  const double & a3 = table_data[4 * idx + 0];
+  const double & a2 = table_data[4 * idx + 1];
+  const double & a1 = table_data[4 * idx + 2];
+  const double & a0 = table_data[4 * idx + 3];
+  
+  double etmp = (a3 * uu + a2) * uu + a1;
+  ener = etmp * uu + a0;
+  fscale = (2. * a3 * uu + a2) * uu + etmp;
+  fscale *= -hi;
+}
+
+class TabInterOp : public OpKernel {
+ public:
+  explicit TabInterOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("sel_a", &sel_a));
+    OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
+    cum_sum (sec_a, sel_a);
+    cum_sum (sec_r, sel_r);
+    nnei_a = sec_a.back();
+    nnei_r = sec_r.back();
+    nnei = nnei_a + nnei_r;
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Grab the input tensor
+    int tmp_idx = 0;
+    const Tensor& table_info_tensor	= context->input(tmp_idx++);
+    const Tensor& table_data_tensor	= context->input(tmp_idx++);
+    const Tensor& type_tensor	= context->input(tmp_idx++);
+    const Tensor& rij_tensor	= context->input(tmp_idx++);
+    const Tensor& nlist_tensor	= context->input(tmp_idx++);
+    const Tensor& natoms_tensor	= context->input(tmp_idx++);
+    const Tensor& scale_tensor	= context->input(tmp_idx++);
+
+    // set size of the sample
+    OP_REQUIRES (context, (table_info_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of table_info should be 1"));
+    OP_REQUIRES (context, (table_data_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of table_data should be 1"));
+    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of type should be 2"));
+    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
+    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
+    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES (context, (scale_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of scale should be 2"));
+
+    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
+    auto natoms	= natoms_tensor	.flat<int>();
+
+    int nframes = type_tensor.shape().dim_size(0);
+    int nloc = natoms(0);
+    int nall = natoms(1);
+    int ntypes = natoms_tensor.shape().dim_size(0) - 2;
+    assert(sel_a.size() == ntypes);
+    assert(sel_r.size() == ntypes);
+
+    // check the sizes
+    OP_REQUIRES (context, (nframes == type_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("shape of type should be nall"));
+    OP_REQUIRES (context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of rij should be 3 * nloc * nnei"));
+    OP_REQUIRES (context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of nlist should be nloc * nnei"));
+    OP_REQUIRES (context, (nloc == scale_tensor.shape().dim_size(1)),		errors::InvalidArgument ("shape of scale should be nloc"));
+
+    // Create an output tensor
+    TensorShape energy_shape ;
+    energy_shape.AddDim (nframes);
+    energy_shape.AddDim (nloc);
+    TensorShape force_shape ;
+    force_shape.AddDim (nframes);
+    force_shape.AddDim (3 * nall);
+    TensorShape virial_shape ;
+    virial_shape.AddDim (nframes);
+    virial_shape.AddDim (9 * nall);
+    Tensor* energy_tensor = NULL;
+    Tensor* force_tensor = NULL;
+    Tensor* virial_tensor = NULL;
+    tmp_idx = 0;
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, energy_shape, &energy_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, force_shape,  &force_tensor ));
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, virial_shape, &virial_tensor));
+    
+    // flat the tensors
+    auto table_info = table_info_tensor.flat<VALUETYPE>();
+    auto table_data = table_data_tensor.flat<VALUETYPE>();
+    auto type	= type_tensor	.matrix<int>();
+    auto rij	= rij_tensor	.matrix<VALUETYPE>();
+    auto nlist	= nlist_tensor	.matrix<int>();
+    auto scale  = scale_tensor	.matrix<VALUETYPE>();
+    auto energy = energy_tensor	->matrix<VALUETYPE>();
+    auto force	= force_tensor	->matrix<VALUETYPE>();
+    auto virial = virial_tensor	->matrix<VALUETYPE>();
+
+    OP_REQUIRES (context, (ntypes == int(table_info(3)+0.1)),	errors::InvalidArgument ("ntypes provided in table does not match deeppot"));
+    int nspline = table_info(2)+0.1;
+    int tab_stride = 4 * nspline;
+    assert(ntypes * ntypes * tab_stride == table_data_tensor.shape().dim_size(0));
+    vector<double > d_table_info(4);
+    vector<double > d_table_data(ntypes * ntypes * tab_stride);
+    for (unsigned ii = 0; ii < d_table_info.size(); ++ii){
+      d_table_info[ii] = table_info(ii);
+    }
+    for (unsigned ii = 0; ii < d_table_data.size(); ++ii){
+      d_table_data[ii] = table_data(ii);
+    }
+    const double * p_table_info = &(d_table_info[0]);
+    const double * p_table_data = &(d_table_data[0]);
+
+    // loop over samples
+#pragma omp parallel for 
+    for (int kk = 0; kk < nframes; ++kk){
+      // fill results with 0
+      for (int ii = 0; ii < nloc; ++ii){
+	int i_idx = ii;
+	energy(kk, i_idx) = 0;
+      }
+      for (int ii = 0; ii < nall; ++ii){
+	int i_idx = ii;
+	force(kk, i_idx * 3 + 0) = 0;
+	force(kk, i_idx * 3 + 1) = 0;
+	force(kk, i_idx * 3 + 2) = 0;
+	for (int dd = 0; dd < 9; ++dd) {
+	  virial(kk, i_idx * 9 + dd) = 0;
+	}
+      }
+      // compute force of a frame
+      int i_idx = 0;
+      for (int tt = 0; tt < ntypes; ++tt) {
+	for (int ii = 0; ii < natoms(2+tt); ++ii){
+	  int i_type = type(kk, i_idx);
+	  VALUETYPE i_scale = scale(kk, i_idx);
+	  assert(i_type == tt) ;
+	  int jiter = 0;
+	  // a neighbor
+	  for (int ss = 0; ss < sel_a.size(); ++ss){
+	    int j_type = ss;
+	    const double * cur_table_data = 
+		p_table_data + (i_type * ntypes + j_type) * tab_stride;
+	    for (int jj = 0; jj < sel_a[ss]; ++jj){
+	      int j_idx = nlist(kk, i_idx * nnei + jiter);
+	      if (j_idx < 0){
+		jiter++;
+		continue;
+	      }
+	      assert(j_type == type(kk, j_idx));
+	      double dr[3];
+	      for (int dd = 0; dd < 3; ++dd){
+		dr[dd] = rij(kk, (i_idx * nnei + jiter) * 3 + dd);
+	      }
+	      double r2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
+	      double ri = 1./sqrt(r2);
+	      double ener, fscale;
+	      tabulated_inter(ener,
+			      fscale, 
+			      p_table_info, 
+			      cur_table_data, 
+			      dr);
+	      // printf("tabforce  %d %d  r: %12.8f  ener: %12.8f %12.8f %8.5f  fj: %8.5f %8.5f %8.5f  dr: %9.6f %9.6f %9.6f\n", 
+	      // 	     i_idx, j_idx, 
+	      // 	     1/ri,
+	      // 	     ener, fscale, i_scale,
+	      // 	     -fscale * dr[00] * ri * 0.5 * i_scale,  -fscale * dr[01] * ri * 0.5 * i_scale,  -fscale * dr[02] * ri * 0.5 * i_scale,
+	      // 	     dr[0], dr[1], dr[2]
+	      // 	  );
+	      energy(kk, i_idx) += 0.5 * ener;
+	      for (int dd = 0; dd < 3; ++dd) {
+		force(kk, i_idx * 3 + dd) -= fscale * dr[dd] * ri * 0.5 * i_scale;
+		force(kk, j_idx * 3 + dd) += fscale * dr[dd] * ri * 0.5 * i_scale;
+	      }
+	      for (int dd0 = 0; dd0 < 3; ++dd0) {
+		for (int dd1 = 0; dd1 < 3; ++dd1) {
+		  virial(kk, i_idx * 9 + dd0 * 3 + dd1) 
+		      += 0.5 * fscale * dr[dd0] * dr[dd1] * ri * 0.5 * i_scale;
+		  virial(kk, j_idx * 9 + dd0 * 3 + dd1) 
+		      += 0.5 * fscale * dr[dd0] * dr[dd1] * ri * 0.5 * i_scale;
+		}
+	      }
+	      jiter++;
+	    }
+	  }
+	  // r neighbor
+	  for (int ss = 0; ss < sel_r.size(); ++ss){
+	    int j_type = ss;
+	    const double * cur_table_data = 
+		p_table_data + (i_type * ntypes + j_type) * tab_stride;
+	    for (int jj = 0; jj < sel_r[ss]; ++jj){
+	      int j_idx = nlist(kk, i_idx * nnei + jiter);
+	      if (j_idx < 0){
+		jiter ++;
+		continue;
+	      }
+	      assert(j_type == type(kk, j_idx));
+	      double dr[3];
+	      for (int dd = 0; dd < 3; ++dd){
+		dr[dd] = rij(kk, (i_idx * nnei + jiter) * 3 + dd);
+	      }
+	      double r2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
+	      double ri = 1./sqrt(r2);
+	      double ener, fscale;
+	      tabulated_inter(ener,
+			      fscale, 
+			      p_table_info, 
+			      cur_table_data, 
+			      dr);
+	      // printf("tabforce  %d %d  %8.5f  %12.8f %12.8f %8.5f  fj: %8.5f %8.5f %8.5f\n", 
+	      // 	     i_idx, j_idx, 
+	      // 	     1/ri, 
+	      // 	     ener, fscale, i_scale,
+	      // 	     -fscale * dr[00] * ri * 0.5 * i_scale,  -fscale * dr[01] * ri * 0.5 * i_scale,  -fscale * dr[02] * ri * 0.5 * i_scale);
+	      energy(kk, i_idx) += 0.5 * ener;
+	      for (int dd = 0; dd < 3; ++dd) {
+		force(kk, i_idx * 3 + dd) -= fscale * dr[dd] * ri * 0.5 * i_scale;
+		force(kk, j_idx * 3 + dd) += fscale * dr[dd] * ri * 0.5 * i_scale;
+	      }
+	      for (int dd0 = 0; dd0 < 3; ++dd0) {
+		for (int dd1 = 0; dd1 < 3; ++dd1) {
+		  virial(kk, j_idx * 9 + dd0 * 3 + dd1) 
+		      += fscale * dr[dd0] * dr[dd1] * ri * 0.5 * i_scale;
+		}
+	      }
+	      jiter++;
+	    }
+	  }
+	  i_idx ++;
+	}
+      }
+    }
+  }
+private:
+  vector<int32> sel_r;
+  vector<int32> sel_a;
+  vector<int> sec_a;
+  vector<int> sec_r;
+  int nnei, nnei_a, nnei_r;
+  void
+  cum_sum (vector<int> & sec,
+	   const vector<int32> & n_sel) const {
+    sec.resize (n_sel.size() + 1);
+    sec[0] = 0;
+    for (int ii = 1; ii < sec.size(); ++ii){
+      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    }
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("TabInter").Device(DEVICE_CPU), TabInterOp);
+
+
+
diff --git a/source/pyproject.toml b/source/pyproject.toml
new file mode 100644
index 0000000000..75335283e8
--- /dev/null
+++ b/source/pyproject.toml
@@ -0,0 +1,2 @@
+[build-system]
+requires = ["setuptools", "wheel", "scikit-build", "cmake", "ninja"]
diff --git a/source/scripts/CMakeLists.txt b/source/scripts/CMakeLists.txt
index 4d48b13085..c78bbc8d5a 100644
--- a/source/scripts/CMakeLists.txt
+++ b/source/scripts/CMakeLists.txt
@@ -1,5 +1,5 @@
 install(
-  PROGRAMS	freeze.py 
-  DESTINATION	bin/
-  RENAME	dp_frz
+  FILES		freeze.py config.py 
+  DESTINATION	deepmd/
 )
+
diff --git a/source/scripts/config.py b/source/scripts/config.py
new file mode 100644
index 0000000000..6e87cf8329
--- /dev/null
+++ b/source/scripts/config.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+
+import glob,os,sys,json,argparse
+import numpy as np
+
+
+def valid_dir(name) :
+    if not os.path.isfile(os.path.join(name, 'type.raw')) :
+        raise OSError
+    sets = glob.glob(os.path.join(name, 'set.*'))
+    for ii in sets :
+        if not os.path.isfile(os.path.join(ii, 'box.npy')) :
+            raise OSError
+        if not os.path.isfile(os.path.join(ii, 'coord.npy')) :
+            raise OSError
+        
+
+def load_systems(dirs) :
+    all_type = []
+    all_box = []
+    for ii in dirs :
+        sys_type = np.loadtxt(os.path.join(ii, 'type.raw'), dtype = int)
+        sys_box = None
+        sets = glob.glob(os.path.join(ii, 'set.*'))
+        for ii in sets :
+            if type(sys_box) is not np.ndarray :
+                sys_box = np.load(os.path.join(ii, 'box.npy'))
+            else :
+                sys_box = np.concatenate((sys_box, np.load(os.path.join(ii, 'box.npy'))), axis = 0)
+        all_type.append(sys_type)
+        all_box.append(sys_box)
+    return all_type, all_box
+
+
+def get_system_names() :
+    dirs = input("Enter system path(s) (seperated by space, wide card supported): \n") 
+    dirs = dirs.split()
+    real_dirs = []
+    for ii in dirs :
+        real_dirs += glob.glob(ii)
+    for ii in real_dirs :
+        valid_dir(ii)
+    return real_dirs
+
+def get_rcut() :
+    dv = 6
+    rcut = input("Enter rcut (default %f A): \n" % dv) 
+    try:
+        rcut = float(rcut)
+    except ValueError:
+        rcut = dv
+    if rcut <= 0:
+        raise ValueError('rcut should be > 0')
+    return rcut
+
+
+def get_batch_size_rule() :
+    dv = 32
+    matom = input("Enter the minimal number of atoms in a batch (default %d): \n" % dv)
+    try:
+        matom = int(matom)
+    except ValueError:
+        matom = dv
+    if matom <= 0:
+        raise ValueError('the number should be > 0')
+    return matom
+
+
+def get_stop_batch():
+    dv = 1000000
+    sb = input("Enter the stop batch (default %d): \n" % dv)
+    try:
+        sb = int(sb)
+    except ValueError:
+        sb = dv
+    if sb <= 0:
+        raise ValueError('the number should be > 0')
+    return sb
+
+
+def get_ntypes (all_type) :
+    coll = []
+    for ii in all_type:
+        coll += list(ii)
+    list_coll = set(coll)
+    return len(list_coll)
+
+
+def get_max_density(all_type, all_box) :
+    ntypes = get_ntypes(all_type)
+    all_max = []
+    for tt, bb in zip(all_type, all_box) :
+        vv = np.reshape(bb, [-1,3,3]) 
+        vv = np.linalg.det(vv)
+        min_v = np.min(vv)
+        type_count = []
+        for ii in range(ntypes) :
+            type_count.append(sum(tt == ii))
+        max_den = type_count / min_v
+        all_max.append(max_den)
+    all_max = np.max(all_max, axis = 0)
+    return all_max
+
+
+
+
+def suggest_sel(all_type, all_box, rcut, ratio = 1.5) :
+    max_den = get_max_density(all_type, all_box)
+    return [int(ii) for ii in max_den * 4./3. * np.pi * rcut**3 * ratio]
+
+
+def suggest_batch_size(all_type, min_atom) :
+    bs = []
+    for ii in all_type :
+        natoms = len(ii)
+        tbs = min_atom // natoms
+        if (min_atom // natoms) * natoms != min_atom :
+            tbs += 1
+        bs.append(tbs)
+    return bs
+
+
+def suggest_decay(sb):
+    decay_steps = int(sb // 200)
+    decay_rate = 0.95
+    return decay_steps, decay_rate
+
+
+def default_data() :
+    data = {}
+    data['use_smooth'] = True
+    data['sel_a'] = []
+    data['rcut_smth'] = -1
+    data['rcut'] = -1
+    data['filter_neuron'] = [20, 40, 80]
+    data['filter_resnet_dt'] = False
+    data['axis_neuron'] = 8
+    data['fitting_neuron'] = [240, 240, 240]
+    data['fitting_resnet_dt'] = True
+    data['coord_norm'] = True
+    data['type_fitting_net'] = False
+    data['systems'] = []
+    data['set_prefix'] = 'set'
+    data['stop_batch'] = -1
+    data['batch_size'] = -1
+    data['start_lr'] = 0.001
+    data['decay_steps'] = -1
+    data['decay_rate'] = 0.95
+    data['start_pref_e'] = 0.02
+    data['limit_pref_e'] = 1
+    data['start_pref_f'] = 1000
+    data['limit_pref_f'] = 1
+    data['start_pref_v'] = 0
+    data['limit_pref_v'] = 0
+    data['seed'] = 1
+    data['disp_file'] = 'lcurve.out'
+    data['disp_freq'] = 1000
+    data['numb_test'] = 10
+    data['save_freq'] = 10000
+    data["save_ckpt"] = "model.ckpt"
+    data["disp_training"] = True
+    data["time_training"] = True
+    return data
+
+
+def config(args) :
+    all_sys = get_system_names()
+    if len(all_sys) == 0 :
+        raise RuntimeError('no system specified')
+    rcut = get_rcut()
+    matom = get_batch_size_rule()
+    stop_batch = get_stop_batch()
+
+    all_type, all_box = load_systems(all_sys)
+    sel = suggest_sel(all_type, all_box, rcut, ratio = 1.5)
+    bs = suggest_batch_size(all_type, matom)
+    decay_steps, decay_rate = suggest_decay(stop_batch)
+    
+    jdata = default_data()
+    jdata['systems'] = all_sys
+    jdata['sel_a'] = sel
+    jdata['rcut'] = rcut
+    jdata['rcut_smth'] = rcut - 0.2
+    jdata['stop_batch'] = stop_batch
+    jdata['batch_size'] = bs
+    jdata['decay_steps'] = decay_steps
+    jdata['decay_rate'] = decay_rate    
+
+    with open(args.output, 'w') as fp:
+        json.dump(jdata, fp, indent=4)
+
diff --git a/source/scripts/freeze.py b/source/scripts/freeze.py
index 5e1899f28b..d5d82fb94c 100755
--- a/source/scripts/freeze.py
+++ b/source/scripts/freeze.py
@@ -11,7 +11,7 @@
 from tensorflow.python.framework import ops
 
 # load force module
-module_path = os.path.dirname(os.path.realpath(__file__)) + "/../lib/"
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/../"
 assert (os.path.isfile (module_path  + "deepmd/libop_abi.so" )), "force module does not exist"
 op_module = tf.load_op_library(module_path + "deepmd/libop_abi.so")
 
@@ -21,6 +21,8 @@
 import deepmd._prod_virial_grad
 import deepmd._prod_force_norot_grad
 import deepmd._prod_virial_norot_grad
+import deepmd._soft_min_force_grad
+import deepmd._soft_min_virial_grad
 
 def freeze_graph(model_folder, 
                  output, 
@@ -65,17 +67,5 @@ def freeze_graph(model_folder,
         print("%d ops in the final graph." % len(output_graph_def.node))
 
 
-if __name__ == '__main__':
-
-    default_frozen_nodes = "energy_test,force_test,virial_test,atom_energy_test,atom_virial_test,t_rcut,t_ntypes"
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-d", "--folder", type=str, default = ".", 
-                        help="path to checkpoint folder")
-    parser.add_argument("-o", "--output", type=str, default = "frozen_model.pb", 
-                        help="name of graph, will output to the checkpoint folder")
-    parser.add_argument("-n", "--nodes", type=str, default = default_frozen_nodes,
-                        help="the frozen nodes, defaults is " + default_frozen_nodes)
-    args = parser.parse_args()
-
+def freeze (args):
     freeze_graph(args.folder, args.output, args.nodes)
diff --git a/source/setup.py b/source/setup.py
new file mode 100644
index 0000000000..7f57a218bc
--- /dev/null
+++ b/source/setup.py
@@ -0,0 +1,41 @@
+from skbuild import setup
+from os import path
+import imp
+
+readme_file = path.join(path.dirname(path.abspath(__file__)), '..', 'README.md')
+try:
+    from m2r import parse_from_file
+    readme = parse_from_file(readme_file)
+except ImportError:
+    with open(readme_file) as f:
+        readme = f.read()
+
+
+tf_install_dir = imp.find_module('tensorflow')[1] 
+
+# install_requires = ['xml']
+install_requires=[]
+
+setup(
+    name="deepmd-kit",
+    setup_requires=['setuptools-git-version'],
+    version_format='{tag}.dev{commitcount}_{gitsha}',
+    author="Han Wang",
+    author_email="wang_han@iapcm.ac.cn",
+    description="A deep learning package for many-body potential energy representation and molecular dynamics",
+    long_description=readme,
+    long_description_content_type="text/markdown",
+    url="https://github.com/deepmodeling/deepmd-kit",
+    packages=['deepmd'],
+    classifiers=[
+        "Programming Language :: Python :: 3.6",
+        "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
+    ],
+    keywords='deepmd',
+    install_requires=install_requires,        
+    cmake_args=['-DTENSORFLOW_ROOT:STRING=%s' % tf_install_dir, 
+                '-DTF_GOOGLE_BIN:BOOL=FALSE', 
+                '-DBUILD_PY_IF:BOOL=TRUE', 
+                '-DBUILD_CPP_IF:BOOL=FALSE',
+    ],
+)
diff --git a/source/tests/CMakeLists.txt b/source/tests/CMakeLists.txt
new file mode 100644
index 0000000000..6078cf977a
--- /dev/null
+++ b/source/tests/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB LIB_PY *py *json)
+
+install(
+  FILES		${LIB_PY}
+  DESTINATION	deepmd/tests
+)
+
diff --git a/source/tests/common.py b/source/tests/common.py
new file mode 100644
index 0000000000..8e52b8a284
--- /dev/null
+++ b/source/tests/common.py
@@ -0,0 +1,250 @@
+import os, sys
+import tensorflow as tf
+import numpy as np
+
+
+class Data():
+    def __init__ (self, 
+                  rand_pert = 0.1, 
+                  seed = 1) :
+        coord = [[0.0, 0.0, 0.1], [1.1, 0.0, 0.1], [0.0, 1.1, 0.1], 
+                 [4.0, 0.0, 0.0], [5.1, 0.0, 0.0], [4.0, 1.1, 0.0]]
+        self.coord = np.array(coord)
+        np.random.seed(seed)
+        self.coord += rand_pert * np.random.random(self.coord.shape)
+        self.fparam = np.array([[0.1, 0.2]])
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype = int)
+        self.cell = 20 * np.eye(3)
+        self.nframes = 1
+        self.coord = self.coord.reshape([self.nframes, -1])
+        self.cell = self.cell.reshape([self.nframes, -1])
+        self.natoms = len(self.atype)        
+        self.idx_map = np.lexsort ((np.arange(self.natoms), self.atype))
+        self.coord = self.coord.reshape([1, -1, 3])
+        self.coord = self.coord[:,self.idx_map,:]
+        self.coord = self.coord.reshape([1, -1])        
+        self.atype = self.atype[self.idx_map]
+        self.datype = np.tile(self.atype, [self.nframes,1])
+        
+    def get_data(self) :
+        return self.coord, self.cell, self.datype
+
+    def get_natoms (self) :
+        ret = [self.natoms, self.natoms]
+        for ii in range(max(self.atype) + 1) :
+            ret.append(np.sum(self.atype == ii))        
+        return np.array(ret, dtype = np.int32)
+    
+    def get_ntypes(self) :
+        return max(self.atype) + 1
+
+    def get_test_box_data (self,
+                           hh) :
+        coord0_, box0_, type0_ = self.get_data()
+        coord0 = coord0_[0]
+        box0 = box0_[0]
+        type0 = type0_[0]
+        nc = np.array( [coord0, coord0*(1+hh), coord0*(1-hh)] )
+        nb = np.array( [box0, box0*(1+hh), box0*(1-hh)] )
+        nt = np.array( [type0, type0, type0] )
+        for dd in range(3) :
+            tmpc = np.copy (coord0)
+            tmpb = np.copy (box0)
+            tmpc = np.reshape(tmpc, [-1, 3])
+            tmpc [:,dd] *= (1+hh)
+            tmpc = np.reshape(tmpc, [-1])
+            tmpb = np.reshape(tmpb, [-1, 3])
+            tmpb [dd,:] *= (1+hh)
+            tmpb = np.reshape(tmpb, [-1])
+            nc = np.append (nc, [tmpc], axis = 0)
+            nb = np.append (nb, [tmpb], axis = 0)
+            nt = np.append (nt, [type0], axis = 0)
+            tmpc = np.copy (coord0)
+            tmpb = np.copy (box0)
+            tmpc = np.reshape(tmpc, [-1, 3])
+            tmpc [:,dd] *= (1-hh)
+            tmpc = np.reshape(tmpc, [-1])
+            tmpb = np.reshape(tmpb, [-1, 3])
+            tmpb [dd,:] *= (1-hh)
+            tmpb = np.reshape(tmpb, [-1])
+            nc = np.append (nc, [tmpc], axis = 0)
+            nb = np.append (nb, [tmpb], axis = 0)
+            nt = np.append (nt, [type0], axis = 0)
+        return nc, nb, nt
+
+
+def force_test (inter, 
+                testCase, 
+                places = 6, 
+                hh = 1e-6, 
+                suffix = '') :
+    # set weights
+    w0 = np.ones (inter.ndescrpt)
+    inter.net_w_i = np.copy(w0)
+    # make network
+    t_energy, t_force, t_virial \
+        = inter.comp_ef (inter.coord, inter.box, inter.type, inter.tnatoms, name = "test_f" + suffix)
+    inter.sess.run (tf.global_variables_initializer())
+    # get data
+    dcoord, dbox, dtype = inter.data.get_data ()
+    # cmp e0, f0
+    [energy, force] = inter.sess.run ([t_energy, t_force], 
+                                     feed_dict = {
+                                         inter.coord:     dcoord,
+                                         inter.box:       dbox,
+                                         inter.type:      dtype,
+                                         inter.tnatoms:   inter.natoms}
+    )
+    # dim force
+    sel_idx = np.arange(inter.natoms[0])    
+    for idx in sel_idx:
+        for dd in range(3):
+            dcoordp = np.copy(dcoord)
+            dcoordm = np.copy(dcoord)
+            dcoordp[0,idx*3+dd] = dcoord[0,idx*3+dd] + hh
+            dcoordm[0,idx*3+dd] = dcoord[0,idx*3+dd] - hh
+            [enerp] = inter.sess.run ([t_energy], 
+                                     feed_dict = {
+                                         inter.coord:     dcoordp,
+                                         inter.box:       dbox,
+                                         inter.type:      dtype,
+                                         inter.tnatoms:   inter.natoms}
+            )
+            [enerm] = inter.sess.run ([t_energy], 
+                                     feed_dict = {
+                                         inter.coord:     dcoordm,
+                                         inter.box:       dbox,
+                                         inter.type:      dtype,
+                                         inter.tnatoms:   inter.natoms}
+            )
+            c_force = -(enerp[0] - enerm[0]) / (2*hh)
+            testCase.assertAlmostEqual(c_force, force[0,idx*3+dd], 
+                                       places = places,
+                                       msg = "force component [%d,%d] failed" % (idx, dd))
+
+def comp_vol (box) : 
+    return np.linalg.det (np.reshape(box, (3,3)))
+
+def virial_test (inter, 
+                 testCase, 
+                 places = 6, 
+                 hh = 1e-6, 
+                 suffix = '') :
+    # set weights
+    w0 = np.ones (inter.ndescrpt)
+    inter.net_w_i = np.copy(w0)
+    # make network
+    t_energy, t_force, t_virial \
+        = inter.comp_ef (inter.coord, inter.box, inter.type, inter.tnatoms, name = "test_v" + suffix)
+    inter.sess.run (tf.global_variables_initializer())
+    # get data
+    dcoord, dbox, dtype = inter.data.get_test_box_data(hh)
+    # cmp e, f, v
+    [energy, force, virial] \
+        = inter.sess.run ([t_energy, t_force, t_virial], 
+                          feed_dict = {
+                              inter.coord:     dcoord,
+                              inter.box:       dbox,
+                              inter.type:      dtype,
+                              inter.tnatoms:   inter.natoms}
+        )
+    # check
+    ana_vir3 = (virial[0][0] + virial[0][4] + virial[0][8])/3. / comp_vol(dbox[0])
+    num_vir3 = -(energy[1] - energy[2]) / (comp_vol(dbox[1]) - comp_vol(dbox[2]))
+    testCase.assertAlmostEqual(ana_vir3, num_vir3)
+    vir_idx = [0, 4, 8]
+    for dd in range (3) :
+        ana_v = (virial[0][vir_idx[dd]] / comp_vol(dbox[0]))
+        idx = 2 * (dd+1) + 1
+        num_v = ( -(energy[idx] - energy[idx+1]) / (comp_vol(dbox[idx]) - comp_vol(dbox[idx+1])) )
+        testCase.assertAlmostEqual(ana_v, num_v)
+
+
+def force_dw_test (inter, 
+                   testCase,
+                   places = 6,
+                   hh = 1e-4, 
+                   suffix = '') :
+    dcoord, dbox, dtype = inter.data.get_data()
+    feed_dict_test0 = {
+        inter.coord:     dcoord,
+        inter.box:       dbox,
+        inter.type:      dtype,
+        inter.tnatoms:   inter.natoms}
+
+    w0 = np.ones (inter.ndescrpt)
+    inter.net_w_i = np.copy(w0)
+        
+    t_ll, t_dw = inter.comp_f_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "f_dw_test_0" + suffix)
+    inter.sess.run (tf.global_variables_initializer())
+    ll_0 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
+    dw_0 = inter.sess.run (t_dw, feed_dict = feed_dict_test0)
+        
+    absolut_e = []
+    relativ_e = []
+    test_list = range (inter.ndescrpt) 
+    ntest = 3
+    if inter.sel_a[0] != 0:
+        test_list = np.concatenate((np.arange(0,ntest), np.arange(inter.sel_a[0]*4, inter.sel_a[0]*4+ntest)))
+    else :
+        test_list = np.arange(0,ntest)
+
+    for ii in test_list:
+        inter.net_w_i = np.copy (w0)
+        inter.net_w_i[ii] += hh
+        t_ll, t_dw = inter.comp_f_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "f_dw_test_" + str(ii*2+1) + suffix)
+        inter.sess.run (tf.global_variables_initializer())
+        ll_1 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
+        inter.net_w_i[ii] -= 2. * hh
+        t_ll, t_dw = inter.comp_f_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "f_dw_test_" + str(ii*2+2) + suffix)
+        inter.sess.run (tf.global_variables_initializer())
+        ll_2 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
+        num_v = (ll_1 - ll_2) / (2. * hh)
+        ana_v = dw_0[ii]
+        diff = np.abs (num_v - ana_v)
+        # print(ii, num_v, ana_v)
+        testCase.assertAlmostEqual(num_v, ana_v, places = places)
+
+
+def virial_dw_test (inter, 
+                   testCase,
+                   places = 6,
+                   hh = 1e-4, 
+                   suffix = '') :
+    dcoord, dbox, dtype = inter.data.get_data()
+    feed_dict_test0 = {
+        inter.coord:     dcoord,
+        inter.box:       dbox,
+        inter.type:      dtype,
+        inter.tnatoms:   inter.natoms}
+
+    w0 = np.ones (inter.ndescrpt)
+    inter.net_w_i = np.copy(w0)
+
+    t_ll, t_dw = inter.comp_v_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "v_dw_test_0" + suffix)
+    inter.sess.run (tf.global_variables_initializer())
+    ll_0 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
+    dw_0 = inter.sess.run (t_dw, feed_dict = feed_dict_test0)
+        
+    absolut_e = []
+    relativ_e = []
+    test_list = range (inter.ndescrpt) 
+    ntest = 3
+    if inter.sel_a[0] != 0 :
+        test_list = np.concatenate((np.arange(0,ntest), np.arange(inter.sel_a[0]*4, inter.sel_a[0]*4+ntest)))
+    else :
+        test_list = np.arange(0,ntest)
+        
+    for ii in test_list:
+        inter.net_w_i = np.copy (w0)
+        inter.net_w_i[ii] += hh
+        t_ll, t_dw = inter.comp_v_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "v_dw_test_" + str(ii*2+1) + suffix)
+        inter.sess.run (tf.global_variables_initializer())
+        ll_1 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
+        inter.net_w_i[ii] -= 2. * hh
+        t_ll, t_dw = inter.comp_v_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "v_dw_test_" + str(ii*2+2) + suffix)
+        inter.sess.run (tf.global_variables_initializer())
+        ll_2 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
+        num_v = (ll_1 - ll_2) / (2. * hh)
+        ana_v = dw_0[ii]
+        testCase.assertAlmostEqual(num_v, ana_v, places = places)
diff --git a/source/tests/test_descrpt_nonsmth.py b/source/tests/test_descrpt_nonsmth.py
new file mode 100644
index 0000000000..1cf7c15bb6
--- /dev/null
+++ b/source/tests/test_descrpt_nonsmth.py
@@ -0,0 +1,200 @@
+import os,sys
+import numpy as np
+import tensorflow as tf
+import unittest
+
+from tensorflow.python.framework import ops
+
+# load grad of force module
+module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+sys.path.append (module_path)
+import _prod_force_grad
+import _prod_virial_grad
+import _soft_min_force_grad
+import _soft_min_virial_grad
+
+from common import force_test
+from common import virial_test
+from common import force_dw_test
+from common import virial_dw_test
+from common import Data
+
+from deepmd.ModelLocFrame import op_module
+
+class Inter():
+    def __init__ (self,
+                  data,
+                  comp = 0) :
+        self.sess = tf.Session()
+        self.data = data
+        self.natoms = self.data.get_natoms()
+        self.ntypes = self.data.get_ntypes()
+        self.sel_a = [12,24]
+        self.sel_r = [12,24]
+        self.rcut_a = -1
+        self.rcut_r = 10.0
+        self.axis_rule = [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
+        self.nnei_a = np.cumsum(self.sel_a)[-1]
+        self.nnei_r = np.cumsum(self.sel_r)[-1]
+        self.nnei = self.nnei_a + self.nnei_r
+        self.ndescrpt_a = self.nnei_a * 4
+        self.ndescrpt_r = self.nnei_r * 1
+        self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
+        davg = np.zeros ([self.ntypes, self.ndescrpt])
+        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        self.t_avg = tf.constant(davg.astype(np.float64))
+        self.t_std = tf.constant(dstd.astype(np.float64))
+        self.default_mesh = np.zeros (6, dtype = np.int32)
+        self.default_mesh[3] = 2
+        self.default_mesh[4] = 2
+        self.default_mesh[5] = 2
+        # make place holder
+        self.coord      = tf.placeholder(tf.float64, [None, self.natoms[0] * 3], name='t_coord')
+        self.box        = tf.placeholder(tf.float64, [None, 9], name='t_box')
+        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
+        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
+
+        
+    def _net (self,
+             inputs, 
+             name,
+             reuse = False) :
+        with tf.variable_scope(name, reuse=reuse):
+            net_w = tf.get_variable ('net_w', 
+                                     [self.ndescrpt], 
+                                     tf.float64,
+                                     tf.constant_initializer (self.net_w_i))
+        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
+                           tf.reshape (net_w, [self.ndescrpt, 1]))
+        return tf.reshape (dot_v, [-1])
+        
+    def comp_ef (self, 
+                 dcoord, 
+                 dbox, 
+                 dtype,
+                 tnatoms,
+                 name,
+                 reuse = None) :
+        descrpt, descrpt_deriv, rij, nlist, axis \
+            = op_module.descrpt (dcoord, 
+                                 dtype,
+                                 tnatoms,
+                                 dbox, 
+                                 tf.constant(self.default_mesh),
+                                 self.t_avg,
+                                 self.t_std,
+                                 rcut_a = self.rcut_a, 
+                                 rcut_r = self.rcut_r, 
+                                 sel_a = self.sel_a, 
+                                 sel_r = self.sel_r, 
+                                 axis_rule = self.axis_rule)
+        self.axis = axis
+        self.nlist = nlist
+        self.descrpt = descrpt
+        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
+        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
+        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        net_deriv = net_deriv_[0]
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
+
+        force = op_module.prod_force (net_deriv_reshape, 
+                                      descrpt_deriv, 
+                                      nlist, 
+                                      axis, 
+                                      tnatoms,
+                                      n_a_sel = self.nnei_a, 
+                                      n_r_sel = self.nnei_r)
+        virial, atom_vir = op_module.prod_virial (net_deriv_reshape, 
+                                                  descrpt_deriv, 
+                                                  rij,
+                                                  nlist, 
+                                                  axis, 
+                                                  tnatoms,
+                                                  n_a_sel = self.nnei_a, 
+                                                  n_r_sel = self.nnei_r)
+        return energy, force, virial
+
+
+    def comp_f_dw (self, 
+                   dcoord, 
+                   dbox, 
+                   dtype,                 
+                   tnatoms,
+                   name,
+                   reuse = None) :
+        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+        with tf.variable_scope(name, reuse=True):
+            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
+        f_mag = tf.reduce_sum (tf.nn.tanh(force))
+        f_mag_dw = tf.gradients (f_mag, net_w)
+        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+        return f_mag, f_mag_dw[0]
+
+
+    def comp_v_dw (self, 
+                   dcoord, 
+                   dbox, 
+                   dtype,                 
+                   tnatoms,
+                   name,
+                   reuse = None) :
+        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+        with tf.variable_scope(name, reuse=True):
+            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
+        v_mag = tf.reduce_sum (virial)
+        v_mag_dw = tf.gradients (v_mag, net_w)
+        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+        return v_mag, v_mag_dw[0]
+
+
+
+class TestNonSmooth(Inter, unittest.TestCase):
+    def __init__ (self, *args, **kwargs):
+        self.places = 5
+        data = Data()
+        Inter.__init__(self, data)
+        unittest.TestCase.__init__(self, *args, **kwargs)
+        self.controller = object()
+
+    def test_force (self) :
+        force_test(self, self, places=5)
+        # t_energy, t_force, t_virial \
+        #     = self.comp_ef (self.coord, self.box, self.type, self.tnatoms, name = "test")
+        # self.sess.run (tf.global_variables_initializer())
+        # dcoord, dbox, dtype = self.data.get_data ()
+        # hh = 1e-6
+        # dcoordp = np.copy(dcoord)
+        # dcoordm = np.copy(dcoord)
+        # dcoordp[0,0] = dcoord[0,0] + hh
+        # dcoordm[0,0] = dcoord[0,0] - hh
+        # [axis0, nlist0, d0] = self.sess.run ([self.axis, self.nlist, self.descrpt], 
+        #                                  feed_dict = {
+        #                                      self.coord:     dcoordp,
+        #                                      self.box:       dbox,
+        #                                      self.type:      dtype,
+        #                                      self.tnatoms:   self.natoms}
+        # )
+        # [axis1, nlist1, d1] = self.sess.run ([self.axis, self.nlist, self.descrpt], 
+        #                                  feed_dict = {
+        #                                      self.coord:     dcoordm,
+        #                                      self.box:       dbox,
+        #                                      self.type:      dtype,
+        #                                      self.tnatoms:   self.natoms}
+        # )
+        # print((nlist0 - nlist1))
+        # print((axis0 - axis1))
+
+    def test_virial (self) :
+        virial_test(self, self, places=5)
+
+    def test_force_dw (self) :
+        force_dw_test(self, self, places=5)
+
+    def test_virial_dw (self) :
+        virial_dw_test(self, self, places=5)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/source/tests/test_descrpt_se_r.py b/source/tests/test_descrpt_se_r.py
new file mode 100644
index 0000000000..db61bed7d3
--- /dev/null
+++ b/source/tests/test_descrpt_se_r.py
@@ -0,0 +1,159 @@
+import os,sys
+import numpy as np
+import tensorflow as tf
+import unittest
+
+from tensorflow.python.framework import ops
+
+# load grad of force module
+module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+sys.path.append (module_path)
+import _prod_force_grad
+import _prod_virial_grad
+import _prod_force_se_r_grad
+import _prod_virial_se_r_grad
+import _soft_min_force_grad
+import _soft_min_virial_grad
+
+from common import force_test
+from common import virial_test
+from common import force_dw_test
+from common import virial_dw_test
+from common import Data
+
+from deepmd.ModelLocFrame import op_module
+
+class Inter():
+    def __init__ (self, 
+                  data) :
+        self.sess = tf.Session()
+        self.data = data
+        self.natoms = self.data.get_natoms()
+        self.ntypes = self.data.get_ntypes()
+        self.sel = [12,24]
+        self.sel_a = [0,0]
+        self.rcut_smth = 2.45
+        self.rcut = 10.0
+        self.nnei = np.cumsum(self.sel)[-1]
+        self.ndescrpt = self.nnei * 1
+        davg = np.zeros ([self.ntypes, self.ndescrpt])
+        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        self.t_avg = tf.constant(davg.astype(np.float64))
+        self.t_std = tf.constant(dstd.astype(np.float64))
+        self.default_mesh = np.zeros (6, dtype = np.int32)
+        self.default_mesh[3] = 2
+        self.default_mesh[4] = 2
+        self.default_mesh[5] = 2
+        # make place holder
+        self.coord      = tf.placeholder(tf.float64, [None, self.natoms[0] * 3], name='t_coord')
+        self.box        = tf.placeholder(tf.float64, [None, 9], name='t_box')
+        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
+        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
+        
+    def _net (self,
+             inputs, 
+             name,
+              reuse = False) :
+        with tf.variable_scope(name, reuse=reuse):
+            net_w = tf.get_variable ('net_w', 
+                                     [self.ndescrpt], 
+                                     tf.float64,
+                                     tf.constant_initializer (self.net_w_i))
+        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
+                           tf.reshape (net_w, [self.ndescrpt, 1]))
+        return tf.reshape (dot_v, [-1])
+        
+    def comp_ef (self, 
+                 dcoord, 
+                 dbox, 
+                 dtype,
+                 tnatoms,
+                 name,
+                 reuse = None) :
+        descrpt, descrpt_deriv, rij, nlist \
+            = op_module.descrpt_se_r (dcoord, 
+                                      dtype,
+                                      tnatoms,
+                                      dbox, 
+                                      tf.constant(self.default_mesh),
+                                      self.t_avg,
+                                      self.t_std,
+                                      rcut = self.rcut, 
+                                      rcut_smth = self.rcut_smth,
+                                      sel = self.sel)
+        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
+        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
+        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        net_deriv = net_deriv_[0]
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
+
+        force = op_module.prod_force_se_r (net_deriv_reshape, 
+                                            descrpt_deriv, 
+                                            nlist, 
+                                            tnatoms)
+        virial, atom_vir = op_module.prod_virial_se_r (net_deriv_reshape, 
+                                                        descrpt_deriv, 
+                                                        rij,
+                                                        nlist, 
+                                                        tnatoms)
+        return energy, force, virial
+
+
+    def comp_f_dw (self, 
+                   dcoord, 
+                   dbox, 
+                   dtype,                 
+                   tnatoms,
+                   name,
+                   reuse = None) :
+        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+        with tf.variable_scope(name, reuse=True):
+            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
+        f_mag = tf.reduce_sum (tf.nn.tanh(force))
+        f_mag_dw = tf.gradients (f_mag, net_w)
+        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+        return f_mag, f_mag_dw[0]
+
+
+    def comp_v_dw (self, 
+                   dcoord, 
+                   dbox, 
+                   dtype,                 
+                   tnatoms,
+                   name,
+                   reuse = None) :
+        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+        with tf.variable_scope(name, reuse=True):
+            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
+        v_mag = tf.reduce_sum (virial)
+        v_mag_dw = tf.gradients (v_mag, net_w)
+        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+        return v_mag, v_mag_dw[0]
+
+
+
+class TestSmooth(Inter, unittest.TestCase):
+    def __init__ (self, *args, **kwargs):
+        self.places = 5
+        data = Data()
+        Inter.__init__(self, data)
+        unittest.TestCase.__init__(self, *args, **kwargs)
+        self.controller = object()
+
+    def test_force (self) :
+        force_test(self, self, places=5, suffix = '_se_r')
+
+    def test_virial (self) :
+        virial_test(self, self, places=5, suffix = '_se_r')
+
+    def test_force_dw (self) :
+        force_dw_test(self, self, places=5, suffix = '_se_r')
+
+    def test_virial_dw (self) :
+        virial_dw_test(self, self, places=5, suffix = '_se_r')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/source/tests/test_descrpt_smooth.py b/source/tests/test_descrpt_smooth.py
new file mode 100644
index 0000000000..633deaf7cd
--- /dev/null
+++ b/source/tests/test_descrpt_smooth.py
@@ -0,0 +1,170 @@
+import os,sys
+import numpy as np
+import tensorflow as tf
+import unittest
+
+from tensorflow.python.framework import ops
+
+# load grad of force module
+module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+sys.path.append (module_path)
+import _prod_force_grad
+import _prod_virial_grad
+import _prod_force_se_a_grad
+import _prod_virial_se_a_grad
+import _soft_min_force_grad
+import _soft_min_virial_grad
+
+from common import force_test
+from common import virial_test
+from common import force_dw_test
+from common import virial_dw_test
+from common import Data
+
+from deepmd.ModelLocFrame import op_module
+
+class Inter():
+    def __init__ (self, 
+                  data) :
+        self.sess = tf.Session()
+        self.data = data
+        self.natoms = self.data.get_natoms()
+        self.ntypes = self.data.get_ntypes()
+        self.sel_a = [12,24]
+        self.sel_r = [0,0]
+        self.rcut_a = -1
+        self.rcut_r_smth = 2.45
+        self.rcut_r = 10.0
+        self.nnei_a = np.cumsum(self.sel_a)[-1]
+        self.nnei_r = np.cumsum(self.sel_r)[-1]
+        self.nnei = self.nnei_a + self.nnei_r
+        self.ndescrpt_a = self.nnei_a * 4
+        self.ndescrpt_r = self.nnei_r * 1
+        self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
+        davg = np.zeros ([self.ntypes, self.ndescrpt])
+        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        self.t_avg = tf.constant(davg.astype(np.float64))
+        self.t_std = tf.constant(dstd.astype(np.float64))
+        self.default_mesh = np.zeros (6, dtype = np.int32)
+        self.default_mesh[3] = 2
+        self.default_mesh[4] = 2
+        self.default_mesh[5] = 2
+        # make place holder
+        self.coord      = tf.placeholder(tf.float64, [None, self.natoms[0] * 3], name='t_coord')
+        self.box        = tf.placeholder(tf.float64, [None, 9], name='t_box')
+        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
+        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
+        
+    def _net (self,
+             inputs, 
+             name,
+              reuse = False) :
+        with tf.variable_scope(name, reuse=reuse):
+            net_w = tf.get_variable ('net_w', 
+                                     [self.ndescrpt], 
+                                     tf.float64,
+                                     tf.constant_initializer (self.net_w_i))
+        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
+                           tf.reshape (net_w, [self.ndescrpt, 1]))
+        return tf.reshape (dot_v, [-1])
+        
+    def comp_ef (self, 
+                 dcoord, 
+                 dbox, 
+                 dtype,
+                 tnatoms,
+                 name,
+                 reuse = None) :
+        descrpt, descrpt_deriv, rij, nlist \
+            = op_module.descrpt_se_a (dcoord, 
+                                       dtype,
+                                       tnatoms,
+                                       dbox, 
+                                       tf.constant(self.default_mesh),
+                                       self.t_avg,
+                                       self.t_std,
+                                       rcut_a = self.rcut_a, 
+                                       rcut_r = self.rcut_r, 
+                                       rcut_r_smth = self.rcut_r_smth,
+                                       sel_a = self.sel_a, 
+                                       sel_r = self.sel_r)
+        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
+        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
+        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        net_deriv = net_deriv_[0]
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
+
+        force = op_module.prod_force_se_a (net_deriv_reshape, 
+                                            descrpt_deriv, 
+                                            nlist, 
+                                            tnatoms,
+                                            n_a_sel = self.nnei_a, 
+                                            n_r_sel = self.nnei_r)
+        virial, atom_vir = op_module.prod_virial_se_a (net_deriv_reshape, 
+                                                        descrpt_deriv, 
+                                                        rij,
+                                                        nlist, 
+                                                        tnatoms,
+                                                        n_a_sel = self.nnei_a, 
+                                                        n_r_sel = self.nnei_r)
+        return energy, force, virial
+
+
+    def comp_f_dw (self, 
+                   dcoord, 
+                   dbox, 
+                   dtype,                 
+                   tnatoms,
+                   name,
+                   reuse = None) :
+        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+        with tf.variable_scope(name, reuse=True):
+            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
+        f_mag = tf.reduce_sum (tf.nn.tanh(force))
+        f_mag_dw = tf.gradients (f_mag, net_w)
+        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+        return f_mag, f_mag_dw[0]
+
+
+    def comp_v_dw (self, 
+                   dcoord, 
+                   dbox, 
+                   dtype,                 
+                   tnatoms,
+                   name,
+                   reuse = None) :
+        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+        with tf.variable_scope(name, reuse=True):
+            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
+        v_mag = tf.reduce_sum (virial)
+        v_mag_dw = tf.gradients (v_mag, net_w)
+        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+        return v_mag, v_mag_dw[0]
+
+
+
+class TestSmooth(Inter, unittest.TestCase):
+    def __init__ (self, *args, **kwargs):
+        self.places = 5
+        data = Data()
+        Inter.__init__(self, data)
+        unittest.TestCase.__init__(self, *args, **kwargs)
+        self.controller = object()
+
+    def test_force (self) :
+        force_test(self, self, places=5, suffix = '_smth')
+
+    def test_virial (self) :
+        virial_test(self, self, places=5, suffix = '_smth')
+
+    def test_force_dw (self) :
+        force_dw_test(self, self, places=5, suffix = '_smth')
+
+    def test_virial_dw (self) :
+        virial_dw_test(self, self, places=5, suffix = '_smth')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/source/tests/test_model_loc_frame.py b/source/tests/test_model_loc_frame.py
new file mode 100644
index 0000000000..c99ef57e5e
--- /dev/null
+++ b/source/tests/test_model_loc_frame.py
@@ -0,0 +1,131 @@
+import dpdata,os,sys,json,unittest
+import numpy as np
+import tensorflow as tf
+from common import Data
+
+lib_path = os.path.dirname(os.path.realpath(__file__)) + ".."
+sys.path.append (lib_path)
+
+from deepmd.RunOptions import RunOptions
+from deepmd.DataSystem import DataSystem
+from deepmd.DescrptLocFrame import DescrptLocFrame
+from deepmd.EnerFitting import EnerFitting
+from deepmd.Model import Model
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+global_ener_float_precision = tf.float64
+global_tf_float_precision = tf.float64
+global_np_float_precision = np.float64
+
+def gen_data() :
+    tmpdata = Data(rand_pert = 0.1, seed = 1)
+    sys = dpdata.LabeledSystem()
+    sys.data['coords'] = tmpdata.coord
+    sys.data['atom_types'] = tmpdata.atype
+    sys.data['cells'] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
+    sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
+    sys.data['energies'] = np.zeros([nframes,1])
+    sys.data['forces'] = np.zeros([nframes,natoms,3])
+    sys.data['virials'] = []
+    sys.to_deepmd_npy('system', prec=np.float64)    
+
+class TestModel(unittest.TestCase):
+    def setUp(self) :
+        gen_data()
+
+    def test_model(self):
+        jfile = 'water.json'
+        with open(jfile) as fp:
+            jdata = json.load (fp)
+        run_opt = RunOptions(None) 
+        systems = j_must_have(jdata, 'systems')
+        set_pfx = j_must_have(jdata, 'set_prefix')
+        batch_size = j_must_have(jdata, 'batch_size')
+        test_size = j_must_have(jdata, 'numb_test')
+        batch_size = 1
+        test_size = 1
+        stop_batch = j_must_have(jdata, 'stop_batch')
+        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
+        
+        test_prop_c, \
+            test_energy, test_force, test_virial, test_atom_ener, \
+            test_coord, test_box, test_type, test_fparam, \
+            natoms_vec, \
+            default_mesh \
+            = data.get_test ()
+        numb_test = 1
+        
+        bias_atom_e = data.compute_energy_shift()
+
+        descrpt = DescrptLocFrame(jdata['model']['descriptor'])
+        fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
+        model = Model(jdata['model'], descrpt, fitting)
+
+        davg, dstd = model.compute_dstats([test_coord], [test_box], [test_type], [natoms_vec], [default_mesh])
+
+        t_prop_c           = tf.placeholder(tf.float32, [4],    name='t_prop_c')
+        t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
+        t_force            = tf.placeholder(global_tf_float_precision, [None], name='t_force')
+        t_virial           = tf.placeholder(global_tf_float_precision, [None], name='t_virial')
+        t_atom_ener        = tf.placeholder(global_tf_float_precision, [None], name='t_atom_ener')
+        t_coord            = tf.placeholder(global_tf_float_precision, [None], name='i_coord')
+        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
+        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
+        t_box              = tf.placeholder(global_tf_float_precision, [None, 9], name='i_box')
+        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
+        is_training        = tf.placeholder(tf.bool)
+        t_fparam = None
+
+        energy, force, virial, atom_ener, atom_virial \
+            = model.build (t_coord, 
+                           t_type, 
+                           t_natoms, 
+                           t_box, 
+                           t_mesh,
+                           t_fparam,
+                           davg = davg,
+                           dstd = dstd,
+                           bias_atom_e = bias_atom_e, 
+                           suffix = "loc_frame", 
+                           reuse = False)
+
+        feed_dict_test = {t_prop_c:        test_prop_c,
+                          t_energy:        test_energy              [:numb_test],
+                          t_force:         np.reshape(test_force    [:numb_test, :], [-1]),
+                          t_virial:        np.reshape(test_virial   [:numb_test, :], [-1]),
+                          t_atom_ener:     np.reshape(test_atom_ener[:numb_test, :], [-1]),
+                          t_coord:         np.reshape(test_coord    [:numb_test, :], [-1]),
+                          t_box:           test_box                 [:numb_test, :],
+                          t_type:          np.reshape(test_type     [:numb_test, :], [-1]),
+                          t_natoms:        natoms_vec,
+                          t_mesh:          default_mesh,
+                          is_training:     False}
+
+        sess = tf.Session()
+        sess.run(tf.global_variables_initializer())
+        [e, f, v] = sess.run([energy, force, virial], 
+                             feed_dict = feed_dict_test)
+
+        e = e.reshape([-1])
+        f = f.reshape([-1])
+        v = v.reshape([-1])
+        refe = [1.165945032784766511e+01]
+        reff = [2.356319331246305437e-01,1.772322096063349284e-01,1.455439548950788684e-02,1.968599426000810226e-01,2.648214484898352983e-01,7.595232354012236564e-02,-2.121321856338151401e-01,-2.463886119018566037e-03,-2.075636300914874069e-02,-9.360310077571798101e-03,-1.751965198776750943e-01,-2.046405309983102827e-02,-1.990194093283037535e-01,-1.828347741191920298e-02,-6.916374506995154325e-02,-1.197997068502068031e-02,-2.461097746875573200e-01,1.987744214930105627e-02]
+        refv = [-4.998509978510510265e-01,-1.966169437179327711e-02,1.136130543869883977e-02,-1.966169437179334650e-02,-4.575353297894450555e-01,-2.668666556859019493e-03,1.136130543869887100e-02,-2.668666556859039876e-03,2.455466940358383508e-03]
+        refe = np.reshape(refe, [-1])
+        reff = np.reshape(reff, [-1])
+        refv = np.reshape(refv, [-1])
+
+        places = 10
+        for ii in range(e.size) :
+            self.assertAlmostEqual(e[ii], refe[ii], places = places)
+        for ii in range(f.size) :
+            self.assertAlmostEqual(f[ii], reff[ii], places = places)
+        for ii in range(v.size) :
+            self.assertAlmostEqual(v[ii], refv[ii], places = places)
+
diff --git a/source/tests/test_model_se_a.py b/source/tests/test_model_se_a.py
new file mode 100644
index 0000000000..f502eee2ef
--- /dev/null
+++ b/source/tests/test_model_se_a.py
@@ -0,0 +1,130 @@
+import dpdata,os,sys,json,unittest
+import numpy as np
+import tensorflow as tf
+from common import Data
+
+lib_path = os.path.dirname(os.path.realpath(__file__)) + ".."
+sys.path.append (lib_path)
+
+from deepmd.RunOptions import RunOptions
+from deepmd.DataSystem import DataSystem
+from deepmd.DescrptSeA import DescrptSeA
+from deepmd.EnerFitting import EnerFitting
+from deepmd.Model import Model
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+global_ener_float_precision = tf.float64
+global_tf_float_precision = tf.float64
+global_np_float_precision = np.float64
+
+def gen_data() :
+    tmpdata = Data(rand_pert = 0.1, seed = 1)
+    sys = dpdata.LabeledSystem()
+    sys.data['coords'] = tmpdata.coord
+    sys.data['atom_types'] = tmpdata.atype
+    sys.data['cells'] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
+    sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
+    sys.data['energies'] = np.zeros([nframes,1])
+    sys.data['forces'] = np.zeros([nframes,natoms,3])
+    sys.data['virials'] = []
+    sys.to_deepmd_npy('system', prec=np.float64)    
+
+class TestModel(unittest.TestCase):
+    def setUp(self) :
+        gen_data()
+
+    def test_model(self):
+        jfile = 'water_se_a.json'
+        with open(jfile) as fp:
+            jdata = json.load (fp)
+        run_opt = RunOptions(None) 
+        systems = j_must_have(jdata, 'systems')
+        set_pfx = j_must_have(jdata, 'set_prefix')
+        batch_size = j_must_have(jdata, 'batch_size')
+        test_size = j_must_have(jdata, 'numb_test')
+        batch_size = 1
+        test_size = 1
+        stop_batch = j_must_have(jdata, 'stop_batch')
+        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
+        
+        test_prop_c, \
+            test_energy, test_force, test_virial, test_atom_ener, \
+            test_coord, test_box, test_type, test_fparam, \
+            natoms_vec, \
+            default_mesh \
+            = data.get_test ()
+        numb_test = 1
+        
+        bias_atom_e = data.compute_energy_shift()
+
+        descrpt = DescrptSeA(jdata['model']['descriptor'])
+        fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
+        model = Model(jdata['model'], descrpt, fitting)
+
+        davg, dstd = model.compute_dstats([test_coord], [test_box], [test_type], [natoms_vec], [default_mesh])
+
+        t_prop_c           = tf.placeholder(tf.float32, [4],    name='t_prop_c')
+        t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
+        t_force            = tf.placeholder(global_tf_float_precision, [None], name='t_force')
+        t_virial           = tf.placeholder(global_tf_float_precision, [None], name='t_virial')
+        t_atom_ener        = tf.placeholder(global_tf_float_precision, [None], name='t_atom_ener')
+        t_coord            = tf.placeholder(global_tf_float_precision, [None], name='i_coord')
+        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
+        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
+        t_box              = tf.placeholder(global_tf_float_precision, [None, 9], name='i_box')
+        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
+        is_training        = tf.placeholder(tf.bool)
+        t_fparam = None
+
+        energy, force, virial, atom_ener, atom_virial \
+            = model.build (t_coord, 
+                           t_type, 
+                           t_natoms, 
+                           t_box, 
+                           t_mesh,
+                           t_fparam,
+                           davg = davg,
+                           dstd = dstd,
+                           bias_atom_e = bias_atom_e, 
+                           suffix = "se_a", 
+                           reuse = False)
+
+        feed_dict_test = {t_prop_c:        test_prop_c,
+                          t_energy:        test_energy              [:numb_test],
+                          t_force:         np.reshape(test_force    [:numb_test, :], [-1]),
+                          t_virial:        np.reshape(test_virial   [:numb_test, :], [-1]),
+                          t_atom_ener:     np.reshape(test_atom_ener[:numb_test, :], [-1]),
+                          t_coord:         np.reshape(test_coord    [:numb_test, :], [-1]),
+                          t_box:           test_box                 [:numb_test, :],
+                          t_type:          np.reshape(test_type     [:numb_test, :], [-1]),
+                          t_natoms:        natoms_vec,
+                          t_mesh:          default_mesh,
+                          is_training:     False}
+
+        sess = tf.Session()
+        sess.run(tf.global_variables_initializer())
+        [e, f, v] = sess.run([energy, force, virial], 
+                             feed_dict = feed_dict_test)
+
+        e = e.reshape([-1])
+        f = f.reshape([-1])
+        v = v.reshape([-1])
+        refe = [6.135449167779321300e+01]
+        reff = [7.799691562262310585e-02,9.423098804815030483e-02,3.790560997388224204e-03,1.432522403799846578e-01,1.148392791403983204e-01,-1.321871172563671148e-02,-7.318966526325138000e-02,6.516069212737778116e-02,5.406418483320515412e-04,5.870713761026503247e-02,-1.605402669549013672e-01,-5.089516979826595386e-03,-2.554593467731766654e-01,3.092063507347833987e-02,1.510355029451411479e-02,4.869271842355533952e-02,-1.446113274345035005e-01,-1.126524434771078789e-03]
+        refv = [-6.076776685178300053e-01,1.103174323630009418e-01,1.984250991380156690e-02,1.103174323630009557e-01,-3.319759402259439551e-01,-6.007404107650986258e-03,1.984250991380157036e-02,-6.007404107650981921e-03,-1.200076017439753642e-03]
+        refe = np.reshape(refe, [-1])
+        reff = np.reshape(reff, [-1])
+        refv = np.reshape(refv, [-1])
+
+        places = 10
+        for ii in range(e.size) :
+            self.assertAlmostEqual(e[ii], refe[ii], places = places)
+        for ii in range(f.size) :
+            self.assertAlmostEqual(f[ii], reff[ii], places = places)
+        for ii in range(v.size) :
+            self.assertAlmostEqual(v[ii], refv[ii], places = places)
diff --git a/source/tests/test_model_se_a_fparam.py b/source/tests/test_model_se_a_fparam.py
new file mode 100644
index 0000000000..465d51a3f5
--- /dev/null
+++ b/source/tests/test_model_se_a_fparam.py
@@ -0,0 +1,132 @@
+import dpdata,os,sys,json,unittest
+import numpy as np
+import tensorflow as tf
+from common import Data
+
+lib_path = os.path.dirname(os.path.realpath(__file__)) + ".."
+sys.path.append (lib_path)
+
+from deepmd.RunOptions import RunOptions
+from deepmd.DataSystem import DataSystem
+from deepmd.DescrptSeA import DescrptSeA
+from deepmd.EnerFitting import EnerFitting
+from deepmd.Model import Model
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+global_ener_float_precision = tf.float64
+global_tf_float_precision = tf.float64
+global_np_float_precision = np.float64
+
+def gen_data() :
+    tmpdata = Data(rand_pert = 0.1, seed = 1)
+    sys = dpdata.LabeledSystem()
+    sys.data['coords'] = tmpdata.coord
+    sys.data['atom_types'] = tmpdata.atype
+    sys.data['cells'] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
+    sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
+    sys.data['energies'] = np.zeros([nframes,1])
+    sys.data['forces'] = np.zeros([nframes,natoms,3])
+    sys.data['virials'] = []
+    sys.to_deepmd_npy('system', prec=np.float64)    
+    np.save('system/set.000/fparam.npy', tmpdata.fparam)
+
+class TestModel(unittest.TestCase):
+    def setUp(self) :
+        gen_data()
+
+    def test_model(self):
+        jfile = 'water_se_a_fparam.json'
+        with open(jfile) as fp:
+            jdata = json.load (fp)
+        run_opt = RunOptions(None) 
+        systems = j_must_have(jdata, 'systems')
+        set_pfx = j_must_have(jdata, 'set_prefix')
+        batch_size = j_must_have(jdata, 'batch_size')
+        test_size = j_must_have(jdata, 'numb_test')
+        batch_size = 1
+        test_size = 1
+        stop_batch = j_must_have(jdata, 'stop_batch')
+        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
+        
+        test_prop_c, \
+            test_energy, test_force, test_virial, test_atom_ener, \
+            test_coord, test_box, test_type, test_fparam, \
+            natoms_vec, \
+            default_mesh \
+            = data.get_test ()
+        numb_test = 1
+        
+        bias_atom_e = data.compute_energy_shift()
+
+        descrpt = DescrptSeA(jdata['model']['descriptor'])
+        fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
+        model = Model(jdata['model'], descrpt, fitting)
+
+        davg, dstd = model.compute_dstats([test_coord], [test_box], [test_type], [natoms_vec], [default_mesh])
+
+        t_prop_c           = tf.placeholder(tf.float32, [4],    name='t_prop_c')
+        t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
+        t_force            = tf.placeholder(global_tf_float_precision, [None], name='t_force')
+        t_virial           = tf.placeholder(global_tf_float_precision, [None], name='t_virial')
+        t_atom_ener        = tf.placeholder(global_tf_float_precision, [None], name='t_atom_ener')
+        t_coord            = tf.placeholder(global_tf_float_precision, [None], name='i_coord')
+        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
+        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
+        t_box              = tf.placeholder(global_tf_float_precision, [None, 9], name='i_box')
+        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
+        t_fparam           = tf.placeholder(global_tf_float_precision, [None], name='i_fparam')
+        is_training        = tf.placeholder(tf.bool)
+
+        energy, force, virial, atom_ener, atom_virial \
+            = model.build (t_coord, 
+                           t_type, 
+                           t_natoms, 
+                           t_box, 
+                           t_mesh,
+                           t_fparam,
+                           davg = davg,
+                           dstd = dstd,
+                           bias_atom_e = bias_atom_e, 
+                           suffix = "se_a_fparam", 
+                           reuse = False)
+
+        feed_dict_test = {t_prop_c:        test_prop_c,
+                          t_energy:        test_energy              [:numb_test],
+                          t_force:         np.reshape(test_force    [:numb_test, :], [-1]),
+                          t_virial:        np.reshape(test_virial   [:numb_test, :], [-1]),
+                          t_atom_ener:     np.reshape(test_atom_ener[:numb_test, :], [-1]),
+                          t_coord:         np.reshape(test_coord    [:numb_test, :], [-1]),
+                          t_box:           test_box                 [:numb_test, :],
+                          t_type:          np.reshape(test_type     [:numb_test, :], [-1]),
+                          t_natoms:        natoms_vec,
+                          t_mesh:          default_mesh,
+                          t_fparam:        np.reshape(test_fparam   [:numb_test, :], [-1]),
+                          is_training:     False}
+
+        sess = tf.Session()
+        sess.run(tf.global_variables_initializer())
+        [e, f, v] = sess.run([energy, force, virial], 
+                             feed_dict = feed_dict_test)
+
+        e = e.reshape([-1])
+        f = f.reshape([-1])
+        v = v.reshape([-1])
+        refe = [6.135136929183754972e+01]
+        reff = [7.761477777656561328e-02,9.383013575207051205e-02,3.776776376267230399e-03,1.428268971463224069e-01,1.143858253900619654e-01,-1.318441687719179231e-02,-7.271897092708884403e-02,6.494907553857684479e-02,5.355599592111062821e-04,5.840910251709752199e-02,-1.599042555763417750e-01,-5.067165555590445389e-03,-2.546246315216804113e-01,3.073296814647456451e-02,1.505994759166155023e-02,4.849282500878367153e-02,-1.439937492508420736e-01,-1.120701494357654411e-03]
+        refv = [-6.054303146013112480e-01,1.097859194719944115e-01,1.977605183964963390e-02,1.097859194719943976e-01,-3.306167096812382966e-01,-5.978855662865613894e-03,1.977605183964964083e-02,-5.978855662865616497e-03,-1.196331922996723236e-03]
+        refe = np.reshape(refe, [-1])
+        reff = np.reshape(reff, [-1])
+        refv = np.reshape(refv, [-1])
+
+        places = 10
+        for ii in range(e.size) :
+            self.assertAlmostEqual(e[ii], refe[ii], places = places)
+        for ii in range(f.size) :
+            self.assertAlmostEqual(f[ii], reff[ii], places = places)
+        for ii in range(v.size) :
+            self.assertAlmostEqual(v[ii], refv[ii], places = places)
diff --git a/source/tests/test_model_se_a_srtab.py b/source/tests/test_model_se_a_srtab.py
new file mode 100644
index 0000000000..144f482313
--- /dev/null
+++ b/source/tests/test_model_se_a_srtab.py
@@ -0,0 +1,141 @@
+import dpdata,os,sys,json,unittest
+import numpy as np
+import tensorflow as tf
+from common import Data
+
+lib_path = os.path.dirname(os.path.realpath(__file__)) + ".."
+sys.path.append (lib_path)
+
+from deepmd.RunOptions import RunOptions
+from deepmd.DataSystem import DataSystem
+from deepmd.DescrptSeA import DescrptSeA
+from deepmd.EnerFitting import EnerFitting
+from deepmd.Model import Model
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+global_ener_float_precision = tf.float64
+global_tf_float_precision = tf.float64
+global_np_float_precision = np.float64
+
+def gen_data() :
+    tmpdata = Data(rand_pert = 0.1, seed = 1)
+    sys = dpdata.LabeledSystem()
+    sys.data['coords'] = tmpdata.coord
+    sys.data['atom_types'] = tmpdata.atype
+    sys.data['cells'] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
+    sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
+    sys.data['energies'] = np.zeros([nframes,1])
+    sys.data['forces'] = np.zeros([nframes,natoms,3])
+    sys.data['virials'] = []
+    sys.to_deepmd_npy('system', prec=np.float64)    
+
+def _make_tab(ntype) :
+    xx = np.arange(0,9,0.001)
+    yy = 1000/(xx+.5)**6
+    prt = xx
+    ninter = ntype * (ntype + 1) // 2
+    for ii in range(ninter) :
+        prt = np.append(prt, yy)
+    prt = np.reshape(prt, [ninter+1, -1])
+    np.savetxt('tab.xvg', prt.T)
+
+class TestModel(unittest.TestCase):
+    def setUp(self) :
+        gen_data()
+
+    def test_model(self):
+        jfile = 'water_se_a.json'
+        with open(jfile) as fp:
+            jdata = json.load (fp)
+        run_opt = RunOptions(None) 
+        systems = j_must_have(jdata, 'systems')
+        set_pfx = j_must_have(jdata, 'set_prefix')
+        batch_size = j_must_have(jdata, 'batch_size')
+        test_size = j_must_have(jdata, 'numb_test')
+        batch_size = 1
+        test_size = 1
+        stop_batch = j_must_have(jdata, 'stop_batch')
+        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
+        
+        test_prop_c, \
+            test_energy, test_force, test_virial, test_atom_ener, \
+            test_coord, test_box, test_type, test_fparam, \
+            natoms_vec, \
+            default_mesh \
+            = data.get_test ()
+        numb_test = 1
+        
+        bias_atom_e = data.compute_energy_shift()
+
+        descrpt = DescrptSeA(jdata['model']['descriptor'])
+        fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
+        model = Model(jdata['model'], descrpt, fitting)
+
+        davg, dstd = model.compute_dstats([test_coord], [test_box], [test_type], [natoms_vec], [default_mesh])
+
+        t_prop_c           = tf.placeholder(tf.float32, [4],    name='t_prop_c')
+        t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
+        t_force            = tf.placeholder(global_tf_float_precision, [None], name='t_force')
+        t_virial           = tf.placeholder(global_tf_float_precision, [None], name='t_virial')
+        t_atom_ener        = tf.placeholder(global_tf_float_precision, [None], name='t_atom_ener')
+        t_coord            = tf.placeholder(global_tf_float_precision, [None], name='i_coord')
+        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
+        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
+        t_box              = tf.placeholder(global_tf_float_precision, [None, 9], name='i_box')
+        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
+        is_training        = tf.placeholder(tf.bool)
+        t_fparam = None
+
+        energy, force, virial, atom_ener, atom_virial \
+            = model.build (t_coord, 
+                           t_type, 
+                           t_natoms, 
+                           t_box, 
+                           t_mesh,
+                           t_fparam,
+                           davg = davg,
+                           dstd = dstd,
+                           bias_atom_e = bias_atom_e, 
+                           suffix = "se_a_srtab", 
+                           reuse = False)
+
+        feed_dict_test = {t_prop_c:        test_prop_c,
+                          t_energy:        test_energy              [:numb_test],
+                          t_force:         np.reshape(test_force    [:numb_test, :], [-1]),
+                          t_virial:        np.reshape(test_virial   [:numb_test, :], [-1]),
+                          t_atom_ener:     np.reshape(test_atom_ener[:numb_test, :], [-1]),
+                          t_coord:         np.reshape(test_coord    [:numb_test, :], [-1]),
+                          t_box:           test_box                 [:numb_test, :],
+                          t_type:          np.reshape(test_type     [:numb_test, :], [-1]),
+                          t_natoms:        natoms_vec,
+                          t_mesh:          default_mesh,
+                          is_training:     False}
+
+        sess = tf.Session()
+        sess.run(tf.global_variables_initializer())
+        [e, f, v] = sess.run([energy, force, virial], 
+                             feed_dict = feed_dict_test)
+
+        e = e.reshape([-1])
+        f = f.reshape([-1])
+        v = v.reshape([-1])
+
+        refe = [6.135449167779321300e+01]
+        reff = [7.799691562262310585e-02,9.423098804815030483e-02,3.790560997388224204e-03,1.432522403799846578e-01,1.148392791403983204e-01,-1.321871172563671148e-02,-7.318966526325138000e-02,6.516069212737778116e-02,5.406418483320515412e-04,5.870713761026503247e-02,-1.605402669549013672e-01,-5.089516979826595386e-03,-2.554593467731766654e-01,3.092063507347833987e-02,1.510355029451411479e-02,4.869271842355533952e-02,-1.446113274345035005e-01,-1.126524434771078789e-03]
+        refv = [-6.076776685178300053e-01,1.103174323630009418e-01,1.984250991380156690e-02,1.103174323630009557e-01,-3.319759402259439551e-01,-6.007404107650986258e-03,1.984250991380157036e-02,-6.007404107650981921e-03,-1.200076017439753642e-03]
+        refe = np.reshape(refe, [-1])
+        reff = np.reshape(reff, [-1])
+        refv = np.reshape(refv, [-1])
+
+        places = 10
+        for ii in range(e.size) :
+            self.assertAlmostEqual(e[ii], refe[ii], places = places)
+        for ii in range(f.size) :
+            self.assertAlmostEqual(f[ii], reff[ii], places = places)
+        for ii in range(v.size) :
+            self.assertAlmostEqual(v[ii], refv[ii], places = places)
diff --git a/source/tests/test_model_se_r.py b/source/tests/test_model_se_r.py
new file mode 100644
index 0000000000..e0bf8ebbf3
--- /dev/null
+++ b/source/tests/test_model_se_r.py
@@ -0,0 +1,130 @@
+import dpdata,os,sys,json,unittest
+import numpy as np
+import tensorflow as tf
+from common import Data
+
+lib_path = os.path.dirname(os.path.realpath(__file__)) + ".."
+sys.path.append (lib_path)
+
+from deepmd.RunOptions import RunOptions
+from deepmd.DataSystem import DataSystem
+from deepmd.DescrptSeR import DescrptSeR
+from deepmd.EnerFitting import EnerFitting
+from deepmd.Model import Model
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+global_ener_float_precision = tf.float64
+global_tf_float_precision = tf.float64
+global_np_float_precision = np.float64
+
+def gen_data() :
+    tmpdata = Data(rand_pert = 0.1, seed = 1)
+    sys = dpdata.LabeledSystem()
+    sys.data['coords'] = tmpdata.coord
+    sys.data['atom_types'] = tmpdata.atype
+    sys.data['cells'] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
+    sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
+    sys.data['energies'] = np.zeros([nframes,1])
+    sys.data['forces'] = np.zeros([nframes,natoms,3])
+    sys.data['virials'] = []
+    sys.to_deepmd_npy('system', prec=np.float64)    
+
+class TestModel(unittest.TestCase):
+    def setUp(self) :
+        gen_data()
+
+    def test_model(self):
+        jfile = 'water_se_r.json'
+        with open(jfile) as fp:
+            jdata = json.load (fp)
+        run_opt = RunOptions(None) 
+        systems = j_must_have(jdata, 'systems')
+        set_pfx = j_must_have(jdata, 'set_prefix')
+        batch_size = j_must_have(jdata, 'batch_size')
+        test_size = j_must_have(jdata, 'numb_test')
+        batch_size = 1
+        test_size = 1
+        stop_batch = j_must_have(jdata, 'stop_batch')
+        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
+        
+        test_prop_c, \
+            test_energy, test_force, test_virial, test_atom_ener, \
+            test_coord, test_box, test_type, test_fparam, \
+            natoms_vec, \
+            default_mesh \
+            = data.get_test ()
+        numb_test = 1
+        
+        bias_atom_e = data.compute_energy_shift()
+
+        descrpt = DescrptSeR(jdata['model']['descriptor'])
+        fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
+        model = Model(jdata['model'], descrpt, fitting)
+
+        davg, dstd = model.compute_dstats([test_coord], [test_box], [test_type], [natoms_vec], [default_mesh])
+
+        t_prop_c           = tf.placeholder(tf.float32, [4],    name='t_prop_c')
+        t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
+        t_force            = tf.placeholder(global_tf_float_precision, [None], name='t_force')
+        t_virial           = tf.placeholder(global_tf_float_precision, [None], name='t_virial')
+        t_atom_ener        = tf.placeholder(global_tf_float_precision, [None], name='t_atom_ener')
+        t_coord            = tf.placeholder(global_tf_float_precision, [None], name='i_coord')
+        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
+        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
+        t_box              = tf.placeholder(global_tf_float_precision, [None, 9], name='i_box')
+        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
+        is_training        = tf.placeholder(tf.bool)
+        t_fparam = None
+
+        energy, force, virial, atom_ener, atom_virial \
+            = model.build (t_coord, 
+                           t_type, 
+                           t_natoms, 
+                           t_box, 
+                           t_mesh,
+                           t_fparam,
+                           davg = davg,
+                           dstd = dstd,
+                           bias_atom_e = bias_atom_e, 
+                           suffix = "se_r", 
+                           reuse = False)
+
+        feed_dict_test = {t_prop_c:        test_prop_c,
+                          t_energy:        test_energy              [:numb_test],
+                          t_force:         np.reshape(test_force    [:numb_test, :], [-1]),
+                          t_virial:        np.reshape(test_virial   [:numb_test, :], [-1]),
+                          t_atom_ener:     np.reshape(test_atom_ener[:numb_test, :], [-1]),
+                          t_coord:         np.reshape(test_coord    [:numb_test, :], [-1]),
+                          t_box:           test_box                 [:numb_test, :],
+                          t_type:          np.reshape(test_type     [:numb_test, :], [-1]),
+                          t_natoms:        natoms_vec,
+                          t_mesh:          default_mesh,
+                          is_training:     False}
+
+        sess = tf.Session()
+        sess.run(tf.global_variables_initializer())
+        [e, f, v] = sess.run([energy, force, virial], 
+                             feed_dict = feed_dict_test)
+
+        e = e.reshape([-1])
+        f = f.reshape([-1])
+        v = v.reshape([-1])
+        refe = [6.152085988309423925e+01]
+        reff = [-1.714443151616400110e-04,-1.315836609370952051e-04,-5.584120460897444674e-06,-7.197863450669731334e-05,-1.384609799994930676e-04,8.856091902774708468e-06,1.120578238869146797e-04,-7.428703645877488470e-05,9.370560731488587317e-07,-1.048347129617610465e-04,1.977876923815685781e-04,7.522050342771599598e-06,2.361772659657814205e-04,-5.774651813388292487e-05,-1.233143271630744828e-05,2.257277740226381951e-08,2.042905031476775584e-04,6.003548585097267914e-07]
+        refv = [1.035180911513190792e-03,-1.118982949050497126e-04,-2.383287813436022850e-05,-1.118982949050497126e-04,4.362023915782403281e-04,8.119543218224559240e-06,-2.383287813436022850e-05,8.119543218224559240e-06,1.201142938802945237e-06]
+        refe = np.reshape(refe, [-1])
+        reff = np.reshape(reff, [-1])
+        refv = np.reshape(refv, [-1])
+
+        places = 6
+        for ii in range(e.size) :
+            self.assertAlmostEqual(e[ii], refe[ii], places = places)
+        for ii in range(f.size) :
+            self.assertAlmostEqual(f[ii], reff[ii], places = places)
+        for ii in range(v.size) :
+            self.assertAlmostEqual(v[ii], refv[ii], places = places)
diff --git a/source/tests/test_tab_nonsmth.py b/source/tests/test_tab_nonsmth.py
new file mode 100644
index 0000000000..00e1e5d9e4
--- /dev/null
+++ b/source/tests/test_tab_nonsmth.py
@@ -0,0 +1,179 @@
+import os,sys
+import numpy as np
+import tensorflow as tf
+import unittest
+
+from tensorflow.python.framework import ops
+
+# load grad of force module
+module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+sys.path.append (module_path)
+import _prod_force_grad
+import _prod_virial_grad
+import _prod_force_se_a_grad
+import _prod_virial_se_a_grad
+import _soft_min_force_grad
+import _soft_min_virial_grad
+from TabInter import TabInter
+
+from common import force_test
+from common import virial_test
+from common import force_dw_test
+from common import virial_dw_test
+from common import Data
+from test_descrpt_nonsmth import Inter 
+
+from deepmd.ModelLocFrame import op_module
+
+def _make_tab(ntype) :
+    xx = np.arange(0,9,0.001)
+    yy = 1000/(xx+.5)**6
+    prt = xx
+    ninter = ntype * (ntype + 1) // 2
+    for ii in range(ninter) :
+        prt = np.append(prt, yy)
+    prt = np.reshape(prt, [ninter+1, -1])
+    np.savetxt('tab.xvg', prt.T)
+
+
+class IntplInter(Inter):
+    def __init__ (self, 
+                  data) :
+        # tabulated
+        Inter.__init__(self, data)
+        _make_tab(data.get_ntypes())
+        self.srtab = TabInter('tab.xvg')
+        self.smin_alpha = 0.3
+        self.sw_rmin = 1
+        self.sw_rmax = 3.45
+        tab_info, tab_data = self.srtab.get()
+        with tf.variable_scope('tab', reuse=tf.AUTO_REUSE):
+            self.tab_info = tf.get_variable('t_tab_info',
+                                            tab_info.shape,
+                                            dtype = tf.float64,
+                                            trainable = False,
+                                            initializer = tf.constant_initializer(tab_info, dtype = tf.float64))
+            self.tab_data = tf.get_variable('t_tab_data',
+                                            tab_data.shape,
+                                            dtype = tf.float64,
+                                            trainable = False,
+                                            initializer = tf.constant_initializer(tab_data, dtype = tf.float64))
+        
+    def comp_interpl_ef (self, 
+                         dcoord, 
+                         dbox, 
+                         dtype,
+                         tnatoms,
+                         name,
+                         reuse = None) :
+        descrpt, descrpt_deriv, rij, nlist, axis \
+            = op_module.descrpt (dcoord, 
+                                 dtype,
+                                 tnatoms,
+                                 dbox, 
+                                 tf.constant(self.default_mesh),
+                                 self.t_avg,
+                                 self.t_std,
+                                 rcut_a = self.rcut_a, 
+                                 rcut_r = self.rcut_r, 
+                                 sel_a = self.sel_a, 
+                                 sel_r = self.sel_r, 
+                                 axis_rule = self.axis_rule)
+        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
+
+        sw_lambda, sw_deriv \
+            = op_module.soft_min_switch(dtype, 
+                                        rij, 
+                                        nlist,
+                                        tnatoms,
+                                        sel_a = self.sel_a,
+                                        sel_r = self.sel_r,
+                                        alpha = self.smin_alpha,
+                                        rmin = self.sw_rmin,
+                                        rmax = self.sw_rmax)
+        inv_sw_lambda = 1.0 - sw_lambda
+        tab_atom_ener, tab_force, tab_atom_virial \
+            = op_module.tab_inter(self.tab_info,
+                                  self.tab_data,
+                                  dtype,
+                                  rij,
+                                  nlist,
+                                  tnatoms,
+                                  sw_lambda,
+                                  sel_a = self.sel_a,
+                                  sel_r = self.sel_r)
+        energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, self.natoms[0]])
+        tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
+        atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
+        energy_raw = tab_atom_ener + atom_ener
+
+        energy_raw = tf.reshape(energy_raw, [-1, self.natoms[0]])
+        energy = tf.reduce_sum (energy_raw, axis = 1)
+
+        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        net_deriv = net_deriv_[0]
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
+
+        force = op_module.prod_force (net_deriv_reshape, 
+                                      descrpt_deriv, 
+                                      nlist, 
+                                      axis, 
+                                      tnatoms,
+                                      n_a_sel = self.nnei_a, 
+                                      n_r_sel = self.nnei_r)
+        sw_force \
+            = op_module.soft_min_force(energy_diff, 
+                                       sw_deriv,
+                                       nlist, 
+                                       tnatoms,
+                                       n_a_sel = self.nnei_a,
+                                       n_r_sel = self.nnei_r)
+        force = force + sw_force + tab_force
+        virial, atom_vir = op_module.prod_virial (net_deriv_reshape, 
+                                                  descrpt_deriv, 
+                                                  rij,
+                                                  nlist, 
+                                                  axis, 
+                                                  tnatoms,
+                                                  n_a_sel = self.nnei_a, 
+                                                  n_r_sel = self.nnei_r)
+        sw_virial, sw_atom_virial \
+            = op_module.soft_min_virial (energy_diff,
+                                         sw_deriv,
+                                         rij,
+                                         nlist,
+                                         tnatoms,
+                                         n_a_sel = self.nnei_a,
+                                         n_r_sel = self.nnei_r)
+        # atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
+        virial = virial + sw_virial \
+                 + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, self.natoms[1], 9]), axis = 1)
+
+        return energy, force, virial
+
+    
+
+class TestTabNonSmooth(IntplInter, unittest.TestCase):
+    def __init__ (self, *args, **kwargs):
+        self.places = 5
+        data = Data()
+        IntplInter.__init__(self, data)
+        unittest.TestCase.__init__(self, *args, **kwargs)
+        self.controller = object()
+
+    def test_force (self) :
+        force_test(self, self, places=5, suffix = '_tab')
+
+    def test_virial (self) :
+        virial_test(self, self, places=5, suffix = '_tab')
+
+    def test_force_dw (self) :
+        force_dw_test(self, self, places=5, suffix = '_tab')
+
+    def test_virial_dw (self) :
+        virial_dw_test(self, self, places=5, suffix = '_tab')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/source/tests/test_tab_smooth.py b/source/tests/test_tab_smooth.py
new file mode 100644
index 0000000000..d3a8ff4750
--- /dev/null
+++ b/source/tests/test_tab_smooth.py
@@ -0,0 +1,177 @@
+import os,sys
+import numpy as np
+import tensorflow as tf
+import unittest
+
+from tensorflow.python.framework import ops
+
+# load grad of force module
+module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+sys.path.append (module_path)
+import _prod_force_grad
+import _prod_virial_grad
+import _prod_force_se_a_grad
+import _prod_virial_se_a_grad
+import _soft_min_force_grad
+import _soft_min_virial_grad
+from TabInter import TabInter
+
+from common import force_test
+from common import virial_test
+from common import force_dw_test
+from common import virial_dw_test
+from common import Data
+from test_descrpt_smooth import Inter 
+
+from deepmd.ModelLocFrame import op_module
+
+def _make_tab(ntype) :
+    xx = np.arange(0,9,0.001)
+    yy = 1000/(xx+.5)**6
+    prt = xx
+    ninter = ntype * (ntype + 1) // 2
+    for ii in range(ninter) :
+        prt = np.append(prt, yy)
+    prt = np.reshape(prt, [ninter+1, -1])
+    np.savetxt('tab.xvg', prt.T)
+
+
+class IntplInter(Inter):
+    def __init__ (self, 
+                  data) :
+        # tabulated
+        Inter.__init__(self, data)
+        _make_tab(data.get_ntypes())
+        self.srtab = TabInter('tab.xvg')
+        self.smin_alpha = 0.3
+        self.sw_rmin = 1
+        self.sw_rmax = 3.45
+        tab_info, tab_data = self.srtab.get()
+        with tf.variable_scope('tab', reuse=tf.AUTO_REUSE):
+            self.tab_info = tf.get_variable('t_tab_info',
+                                            tab_info.shape,
+                                            dtype = tf.float64,
+                                            trainable = False,
+                                            initializer = tf.constant_initializer(tab_info, dtype = tf.float64))
+            self.tab_data = tf.get_variable('t_tab_data',
+                                            tab_data.shape,
+                                            dtype = tf.float64,
+                                            trainable = False,
+                                            initializer = tf.constant_initializer(tab_data, dtype = tf.float64))
+        
+    def comp_ef (self, 
+                 dcoord, 
+                 dbox, 
+                 dtype,
+                 tnatoms,
+                 name,
+                 reuse = None) :
+        descrpt, descrpt_deriv, rij, nlist \
+            = op_module.descrpt_se_a (dcoord, 
+                                       dtype,
+                                       tnatoms,
+                                       dbox, 
+                                       tf.constant(self.default_mesh),
+                                       self.t_avg,
+                                       self.t_std,
+                                       rcut_a = self.rcut_a, 
+                                       rcut_r = self.rcut_r, 
+                                       rcut_r_smth = self.rcut_r_smth,
+                                       sel_a = self.sel_a, 
+                                       sel_r = self.sel_r)
+        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
+
+        sw_lambda, sw_deriv \
+            = op_module.soft_min_switch(dtype, 
+                                        rij, 
+                                        nlist,
+                                        tnatoms,
+                                        sel_a = self.sel_a,
+                                        sel_r = self.sel_r,
+                                        alpha = self.smin_alpha,
+                                        rmin = self.sw_rmin,
+                                        rmax = self.sw_rmax)
+        inv_sw_lambda = 1.0 - sw_lambda
+        tab_atom_ener, tab_force, tab_atom_virial \
+            = op_module.tab_inter(self.tab_info,
+                                  self.tab_data,
+                                  dtype,
+                                  rij,
+                                  nlist,
+                                  tnatoms,
+                                  sw_lambda,
+                                  sel_a = self.sel_a,
+                                  sel_r = self.sel_r)
+        energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, self.natoms[0]])
+        tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
+        atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
+        energy_raw = tab_atom_ener + atom_ener
+
+        energy_raw = tf.reshape(energy_raw, [-1, self.natoms[0]])
+        energy = tf.reduce_sum (energy_raw, axis = 1)
+
+        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        net_deriv = net_deriv_[0]
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
+
+        force = op_module.prod_force_se_a (net_deriv_reshape, 
+                                      descrpt_deriv, 
+                                      nlist, 
+                                      tnatoms,
+                                      n_a_sel = self.nnei_a, 
+                                      n_r_sel = self.nnei_r)
+        sw_force \
+            = op_module.soft_min_force(energy_diff, 
+                                       sw_deriv,
+                                       nlist, 
+                                       tnatoms,
+                                       n_a_sel = self.nnei_a,
+                                       n_r_sel = self.nnei_r)
+        force = force + sw_force + tab_force
+        virial, atom_vir = op_module.prod_virial_se_a (net_deriv_reshape, 
+                                                  descrpt_deriv, 
+                                                  rij,
+                                                  nlist, 
+                                                  tnatoms,
+                                                  n_a_sel = self.nnei_a, 
+                                                  n_r_sel = self.nnei_r)
+        sw_virial, sw_atom_virial \
+            = op_module.soft_min_virial (energy_diff,
+                                         sw_deriv,
+                                         rij,
+                                         nlist,
+                                         tnatoms,
+                                         n_a_sel = self.nnei_a,
+                                         n_r_sel = self.nnei_r)
+        # atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
+        virial = virial + sw_virial \
+                 + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, self.natoms[1], 9]), axis = 1)
+
+        return energy, force, virial
+
+    
+
+class TestTabSmooth(IntplInter, unittest.TestCase):
+    def __init__ (self, *args, **kwargs):
+        self.places = 5
+        data = Data()
+        IntplInter.__init__(self, data)
+        unittest.TestCase.__init__(self, *args, **kwargs)
+        self.controller = object()
+
+    def test_force (self) :
+        force_test(self, self, places=5, suffix = '_tab_smth')
+
+    def test_virial (self) :
+        virial_test(self, self, places=5, suffix = '_tab_smth')
+
+    def test_force_dw (self) :
+        force_dw_test(self, self, places=5, suffix = '_tab_smth')
+
+    def test_virial_dw (self) :
+        virial_dw_test(self, self, places=5, suffix = '_tab_smth')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/examples/train/water.json b/source/tests/water.json
similarity index 50%
rename from examples/train/water.json
rename to source/tests/water.json
index 6e28744ddf..b4817fecf0 100644
--- a/examples/train/water.json
+++ b/source/tests/water.json
@@ -1,21 +1,28 @@
 {
     "with_distrib":	false,
     "_comment": " model parameters",
-    "use_smooth":	false,
-    "sel_a":		[16, 32],
-    "sel_r":		[30, 60],
-    "rcut":		6.00,
-    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
-    "_comment":	" default rule: []",
-    "_comment":	" user defined rule: for each type provides two axes, ",
-    "_comment":	"                    for each axis: (a_or_r, type, idx)",
-    "_comment":	"                    if type < 0, exclude type -(type+1)",
-    "_comment": "                    for water (O:0, H:1) it can be",
-    "_comment": "                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]",
-    "fitting_neuron":	[240, 120, 60, 30, 10],
+    "model" :{
+	"descriptor":{
+	    "type":		"loc_frame",
+	    "sel_a":		[16, 32],
+	    "sel_r":		[30, 60],
+	    "rcut":		6.00,
+	    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
+	    "_comment":	" default rule: []",
+	    "_comment":	" user defined rule: for each type provides two axes, ",
+	    "_comment":	"                    for each axis: (a_or_r, type, idx)",
+	    "_comment":	"                    if type < 0, exclude type -(type+1)",
+	    "_comment": "                    for water (O:0, H:1) it can be",
+	    "_comment": "                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]"
+	},
+	"fitting_net" : {
+	    "neuron":		[240, 120, 60, 30, 10],
+	    "seed":		1
+	}
+    },
 
     "_comment": " traing controls",
-    "systems":		["../data/water/"],
+    "systems":		["system"],
     "set_prefix":	"set",    
     "stop_batch":	1000000,
     "batch_size":	4,
@@ -36,7 +43,7 @@
     "_comment": " frequencies counted in batch",
     "disp_file":	"lcurve.out",
     "disp_freq":	100,
-    "numb_test":	10,
+    "numb_test":	1,
     "save_freq":	1000,
     "save_ckpt":	"model.ckpt",
     "load_ckpt":	"model.ckpt",
diff --git a/source/tests/water_se_a.json b/source/tests/water_se_a.json
new file mode 100644
index 0000000000..f948682368
--- /dev/null
+++ b/source/tests/water_se_a.json
@@ -0,0 +1,55 @@
+{
+    "_comment": " model parameters",
+    "model" : {
+	"descriptor" :{
+	    "type":		"se_a",
+	    "sel":		[46, 92],
+	    "rcut_smth":	5.80,
+	    "rcut":		6.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	16,
+	    "seed":		1
+	},
+	"fitting_net" : {
+	    "neuron":		[240, 240, 240],
+	    "resnet_dt":	true,
+	    "seed":		1
+	}
+    },
+
+
+    "_comment": " traing controls",
+    "systems":		["system"],
+    "set_prefix":	"set",    
+    "stop_batch":	1000000,
+    "batch_size":	1,
+    "start_lr":		0.005,
+    "decay_steps":	5000,
+    "decay_rate":	0.95,
+
+    "start_pref_e":	0.02,
+    "limit_pref_e":	1,
+    "start_pref_f":	1000,
+    "limit_pref_f":	1,
+    "start_pref_v":	0,
+    "limit_pref_v":	0,
+
+    "seed":		1,
+
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file":	"lcurve.out",
+    "disp_freq":	100,
+    "numb_test":	1,
+    "save_freq":	1000,
+    "save_ckpt":	"model.ckpt",
+    "load_ckpt":	"model.ckpt",
+    "disp_training":	true,
+    "time_training":	true,
+    "profiling":	false,
+    "profiling_file":	"timeline.json",
+
+    "_comment":		"that's all"
+}
+
diff --git a/source/tests/water_se_a_fparam.json b/source/tests/water_se_a_fparam.json
new file mode 100644
index 0000000000..b27ae4c467
--- /dev/null
+++ b/source/tests/water_se_a_fparam.json
@@ -0,0 +1,56 @@
+{
+    "_comment": " model parameters",
+    "model" : {
+	"descriptor" :{
+	    "type":		"se_a",
+	    "sel":		[46, 92],
+	    "rcut_smth":	5.80,
+	    "rcut":		6.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	16,
+	    "seed":		1
+	},
+	"fitting_net" : {
+	    "neuron":		[240, 240, 240],
+	    "resnet_dt":	true,
+	    "numb_fparam":	2,
+	    "seed":		1
+	}
+    },
+
+
+    "_comment": " traing controls",
+    "systems":		["system"],
+    "set_prefix":	"set",    
+    "stop_batch":	1000000,
+    "batch_size":	1,
+    "start_lr":		0.005,
+    "decay_steps":	5000,
+    "decay_rate":	0.95,
+
+    "start_pref_e":	0.02,
+    "limit_pref_e":	1,
+    "start_pref_f":	1000,
+    "limit_pref_f":	1,
+    "start_pref_v":	0,
+    "limit_pref_v":	0,
+
+    "seed":		1,
+
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file":	"lcurve.out",
+    "disp_freq":	100,
+    "numb_test":	1,
+    "save_freq":	1000,
+    "save_ckpt":	"model.ckpt",
+    "load_ckpt":	"model.ckpt",
+    "disp_training":	true,
+    "time_training":	true,
+    "profiling":	false,
+    "profiling_file":	"timeline.json",
+
+    "_comment":		"that's all"
+}
+
diff --git a/source/tests/water_se_a_srtab.json b/source/tests/water_se_a_srtab.json
new file mode 100644
index 0000000000..84c920d3e7
--- /dev/null
+++ b/source/tests/water_se_a_srtab.json
@@ -0,0 +1,59 @@
+{
+    "_comment": " model parameters",
+    "model" : {
+	"use_srtab":		"tab.xvg",
+	"smin_alpha":		0.3,
+	"sw_rmin":		0.6,
+	"sw_rmax":		1.4,	
+	"descriptor" :{
+	    "type":		"se_a",
+	    "sel":		[46, 92],
+	    "rcut_smth":	5.80,
+	    "rcut":		6.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	16,
+	    "seed":		1
+	},
+	"fitting_net" : {
+	    "neuron":		[240, 240, 240],
+	    "resnet_dt":	true,
+	    "seed":		1
+	}
+    },
+
+
+    "_comment": " traing controls",
+    "systems":		["system"],
+    "set_prefix":	"set",    
+    "stop_batch":	1000000,
+    "batch_size":	1,
+    "start_lr":		0.005,
+    "decay_steps":	5000,
+    "decay_rate":	0.95,
+
+    "start_pref_e":	0.02,
+    "limit_pref_e":	1,
+    "start_pref_f":	1000,
+    "limit_pref_f":	1,
+    "start_pref_v":	0,
+    "limit_pref_v":	0,
+
+    "seed":		1,
+
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file":	"lcurve.out",
+    "disp_freq":	100,
+    "numb_test":	1,
+    "save_freq":	1000,
+    "save_ckpt":	"model.ckpt",
+    "load_ckpt":	"model.ckpt",
+    "disp_training":	true,
+    "time_training":	true,
+    "profiling":	false,
+    "profiling_file":	"timeline.json",
+
+    "_comment":		"that's all"
+}
+
diff --git a/examples/train/water_smth.json b/source/tests/water_se_r.json
similarity index 67%
rename from examples/train/water_smth.json
rename to source/tests/water_se_r.json
index e4a639de0f..43194f42fd 100644
--- a/examples/train/water_smth.json
+++ b/source/tests/water_se_r.json
@@ -1,19 +1,24 @@
 {
     "_comment": " model parameters",
-    "use_smooth":	true,
-    "sel_a":		[46, 92],
-    "rcut_smth":	5.80,
-    "rcut":		6.00,
-    "filter_neuron":	[25, 50, 100],
-    "filter_resnet_dt":	false,
-    "axis_neuron":	16,
-    "fitting_neuron":	[240, 240, 240],
-    "fitting_resnet_dt":true,
-    "coord_norm":	true,
-    "type_fitting_net":	false,
+    "model" : {
+	"descriptor" : {
+	    "type":		"se_r",
+	    "sel":		[46, 92],
+	    "rcut_smth":	5.80,
+	    "rcut":		6.00,
+	    "neuron":		[25, 50, 100],
+	    "resnet_dt":	false,
+	    "seed":		1
+	},
+	"fitting_net" : {
+	    "neuron":		[240, 240, 240],
+	    "resnet_dt":	true,
+	    "seed":		1
+	}
+    },
 
     "_comment": " traing controls",
-    "systems":		["../data/water/"],
+    "systems":		["system"],
     "set_prefix":	"set",    
     "stop_batch":	1000000,
     "batch_size":	1,
@@ -34,7 +39,7 @@
     "_comment": " frequencies counted in batch",
     "disp_file":	"lcurve.out",
     "disp_freq":	100,
-    "numb_test":	10,
+    "numb_test":	1,
     "save_freq":	1000,
     "save_ckpt":	"model.ckpt",
     "load_ckpt":	"model.ckpt",
diff --git a/source/train/CMakeLists.txt b/source/train/CMakeLists.txt
index f56070e65e..3f31441338 100644
--- a/source/train/CMakeLists.txt
+++ b/source/train/CMakeLists.txt
@@ -2,25 +2,19 @@
 
 configure_file("RunOptions.py.in" "${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py" @ONLY)
 
-file(GLOB LIB_PY  Data.py DataSystem.py Model.py Test.py TestNorot.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py)
+file(GLOB LIB_PY common.py  DeepPot.py Data.py DataSystem.py Model*.py Descrpt*.py Fitting.py Loss.py LearningRate.py Trainer.py TabInter.py ${CMAKE_CURRENT_BINARY_DIR}/RunOptions.py)
 
 file(GLOB CLS_PY  Local.py Slurm.py)
 
 install(
   FILES		${LIB_PY} 
-  DESTINATION	lib/deepmd
+  DESTINATION	deepmd
 )
 install(
   FILES		${CLS_PY} 
-  DESTINATION	lib/deepmd/cluster
+  DESTINATION	deepmd/cluster
 )
 install(
-  PROGRAMS	train.py 
-  DESTINATION	bin/
-  RENAME	dp_train
-)
-install(
-  PROGRAMS	test.py  
-  DESTINATION	bin/
-  RENAME	dp_test
+  FILES		__main__.py __init__.py train.py test.py
+  DESTINATION	deepmd
 )
diff --git a/source/train/Data.py b/source/train/Data.py
index 2585c1bde4..7012ed16f2 100644
--- a/source/train/Data.py
+++ b/source/train/Data.py
@@ -12,23 +12,36 @@ class DataSets (object):
     def __init__ (self, 
                   sys_path,
                   set_prefix,
-                  seed = None) :
+                  seed = None, 
+                  shuffle_test = True) :
         self.dirs = glob.glob (os.path.join(sys_path, set_prefix + ".*"))
         self.dirs.sort()
         # load atom type
         self.atom_type, self.idx_map, self.idx3_map = self.load_type (sys_path)
+        # load atom type map
+        self.type_map = self.load_type_map(sys_path)
+        if self.type_map is not None:
+            assert(len(self.type_map) >= max(self.atom_type)+1)
         # train dirs
         self.test_dir   = self.dirs[-1]
         if len(self.dirs) == 1 :
             self.train_dirs = self.dirs
         else :
             self.train_dirs = self.dirs[:-1]
+        # check fparam
+        has_fparam = [ os.path.isfile(os.path.join(ii, 'fparam.npy')) for ii in self.dirs ]
+        if any(has_fparam) and (not all(has_fparam)) :
+            raise RuntimeError("system %s: if any set has frame parameter, then all sets should have frame parameter" % sys_path)
+        if all(has_fparam) :
+            self.has_fparam = 0
+        else :
+            self.has_fparam = -1
         # energy norm
         self.eavg = self.stats_energy()
         # load sets
         self.set_count = 0
         self.load_batch_set (self.train_dirs[self.set_count % self.get_numb_set()])
-        self.load_test_set (self.test_dir)
+        self.load_test_set (self.test_dir, shuffle_test)
 
     def check_batch_size (self, batch_size) :
         for ii in self.train_dirs :
@@ -56,6 +69,17 @@ def load_type (self, sys_path) :
         idx3_map = np.lexsort ((idx3, atom_type3))
         return atom_type, idx_map, idx3_map
 
+    def load_type_map(self, sys_path) :
+        fname = os.path.join(sys_path, 'type_map.raw')
+        if os.path.isfile(fname) :            
+            with open(os.path.join(sys_path, 'type_map.raw')) as fp:
+                return fp.read().split()                
+        else :
+            return None
+
+    def get_type_map(self) :
+        return self.type_map
+
     def get_numb_set (self) :
         return len (self.train_dirs)
     
@@ -138,6 +162,13 @@ def load_batch_set (self,
         nframe = self.box_batch.shape[0]
         self.coord_batch = np.reshape(self.coord_batch, [nframe, -1])
         ncoord = self.coord_batch.shape[1]        
+        if self.has_fparam >= 0:
+            self.fparam_batch = np.load(os.path.join(set_name, 'fparam.npy'))
+            self.fparam_batch = np.reshape(self.fparam_batch, [nframe, -1])
+            if self.has_fparam == 0 :
+                self.has_fparam = self.fparam_batch.shape[1]
+            else :
+                assert self.has_fparam == self.fparam_batch.shape[1]
         self.prop_c_batch = np.zeros (4)
         self.prop_c_batch[0], self.energy_batch, self.prop_c_batch[3], self.atom_ener_batch \
             = self.load_energy (nframe, ncoord // 3,
@@ -160,6 +191,8 @@ def load_batch_set (self,
         self.coord_batch = self.coord_batch[idx]
         self.box_batch = self.box_batch[idx]
         self.type_batch = np.tile (self.atom_type, (nframe, 1))
+        if self.has_fparam >= 0 :
+            self.fparam_batch = self.fparam_batch[idx]
         self.reset_iter ()
         # sort according to type
         self.type_batch = self.type_batch[:, self.idx_map]
@@ -169,7 +202,8 @@ def load_batch_set (self,
         end_time = time.time()
 
     def load_test_set (self,
-                       set_name) :
+                       set_name, 
+                       shuffle_test) :
         start_time = time.time()
         self.coord_test = np.load(os.path.join(set_name, "coord.npy"))
         self.box_test = np.load(os.path.join(set_name, "box.npy"))
@@ -177,6 +211,14 @@ def load_test_set (self,
         nframe = self.box_test.shape[0]
         self.coord_test = np.reshape(self.coord_test, [nframe, -1])
         ncoord = self.coord_test.shape[1]
+        fparam_file = os.path.join(set_name, 'fparam.npy')
+        if self.has_fparam >= 0 :
+            self.fparam_test = np.load(fparam_file)
+            self.fparam_test = np.reshape(self.fparam_test, [nframe, -1])
+            if self.has_fparam == 0 :
+                self.has_fparam = self.fparam_test.shape[1]
+            else :
+                assert self.has_fparam == self.fparam_test.shape[1]
         self.prop_c_test = np.zeros (4)
         self.prop_c_test[0], self.energy_test, self.prop_c_test[3], self.atom_ener_test \
             = self.load_energy (nframe, ncoord // 3,
@@ -191,7 +233,8 @@ def load_test_set (self,
                                   os.path.join(set_name, "virial.npy"))
         # shuffle data
         idx = np.arange (nframe)
-        np.random.shuffle (idx)
+        if shuffle_test:
+            np.random.shuffle (idx)
         self.energy_test = self.energy_test[idx]
         self.force_test = self.force_test[idx]
         self.virial_test = self.virial_test[idx]
@@ -199,6 +242,8 @@ def load_test_set (self,
         self.coord_test = self.coord_test[idx]
         self.box_test = self.box_test[idx]
         self.type_test = np.tile (self.atom_type, (nframe, 1))
+        if self.has_fparam >= 0 :
+            self.fparam_test = self.fparam_test[idx]
         # sort according to type
         self.type_test = self.type_test[:, self.idx_map]
         self.atom_ener_test = self.atom_ener_test[:, self.idx_map]
@@ -215,6 +260,10 @@ def get_test (self) :
         returned property prefector [4] in order: 
         energy, force, virial, atom_ener
         """
+        if self.has_fparam >= 0 :
+            ret_fparam = self.fparam_test.astype(global_np_float_precision)
+        else :
+            ret_fparam = None
         return \
             self.prop_c_test.astype(np.float32), \
             self.energy_test.astype(global_np_float_precision), \
@@ -223,7 +272,8 @@ def get_test (self) :
             self.atom_ener_test.astype(global_np_float_precision), \
             self.coord_test.astype(global_np_float_precision), \
             self.box_test.astype(global_np_float_precision), \
-            self.type_test
+            self.type_test,\
+            ret_fparam
     
     def get_batch (self,
                    batch_size) :
@@ -242,6 +292,10 @@ def get_batch (self,
             iterator_1 = set_size
         idx = np.arange (self.iterator, iterator_1)
         self.iterator += batch_size
+        if self.has_fparam >= 0 :
+            ret_fparam = self.fparam_batch[idx, :].astype(global_np_float_precision)
+        else :
+            ret_fparam = None
         return \
             self.prop_c_batch.astype(np.float32), \
             self.energy_batch[idx].astype(global_np_float_precision), \
@@ -250,7 +304,8 @@ def get_batch (self,
             self.atom_ener_batch[idx, :].astype(global_np_float_precision), \
             self.coord_batch[idx, :].astype(global_np_float_precision), \
             self.box_batch[idx, :].astype(global_np_float_precision), \
-            self.type_batch[idx, :]
+            self.type_batch[idx, :],\
+            ret_fparam
     
     def get_natoms (self) :
         sample_type = self.type_batch[0]
@@ -281,14 +336,6 @@ def get_sys_numb_batch (self, batch_size) :
     def get_ener (self) :
         return self.eavg
 
-if __name__ == '__main__':
-    data = DataSets (".", "set")
-    prop_c, energy, force, virial, atom_ener, coord, box, ttype = data.get_batch(1)
-    print (energy.shape)
-    print (force.shape)
-    print (coord.shape)
-    print (box.shape)
-    print (ttype.shape)
-    # energy, force, coord, box, ttype = data.get_test()
-    print (energy)
-    
+    def numb_fparam(self) :
+        return self.has_fparam
+
diff --git a/source/train/DataSystem.py b/source/train/DataSystem.py
index 19bc4cf4dc..33a81b5404 100644
--- a/source/train/DataSystem.py
+++ b/source/train/DataSystem.py
@@ -32,19 +32,31 @@ def __init__ (self,
             sys_all_types = np.loadtxt(os.path.join(ii, "type.raw")).astype(int)
             self.ntypes.append(np.max(sys_all_types) + 1)
         self.sys_ntypes = max(self.ntypes)
+        type_map = []
         for ii in range(self.nsystems) :
             self.natoms.append(self.data_systems[ii].get_natoms())
             self.natoms_vec.append(self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int))
             self.nbatches.append(self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii]))
+            type_map.append(self.data_systems[ii].get_type_map())
+        self.type_map = self.check_type_map_consistency(type_map)
+
+        # check frame parameters
+        has_fparam = [ii.numb_fparam() for ii in self.data_systems]
+        for ii in has_fparam :
+            if ii != has_fparam[0] :
+                raise RuntimeError("if any system has frame parameter, then all systems should have the same number of frame parameter")
+        self.has_fparam = has_fparam[0]
 
         # check the size of data if they satisfy the requirement of batch and test
         for ii in range(self.nsystems) :
             chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii])
             if chk_ret is not None :
-                raise RuntimeError(" required batch size %d is larger than the size %d of the dataset %s" % (self.batch_size[ii], chk_ret[1], chk_ret[0]))
+                raise RuntimeError ("system %s required batch size %d is larger than the size %d of the dataset %s" % \
+                                    (self.system_dirs[ii], self.batch_size[ii], chk_ret[1], chk_ret[0]))
             chk_ret = self.data_systems[ii].check_test_size(test_size)
             if chk_ret is not None :
-                raise RuntimeError(" required test size %d is larger than the size %d of the dataset %s" % (test_size, chk_ret[1], chk_ret[0]))
+                print("WARNNING: system %s required test size %d is larger than the size %d of the dataset %s" % \
+                      (self.system_dirs[ii], test_size, chk_ret[1], chk_ret[0]))
 
         if run_opt is not None:
             self.print_summary(run_opt)
@@ -59,9 +71,10 @@ def __init__ (self,
         self.test_coord = []
         self.test_box = []
         self.test_type = []
+        self.test_fparam = []
         self.default_mesh = []
         for ii in range(self.nsystems) :
-            test_prop_c, test_energy, test_force, test_virial, test_atom_ener, test_coord, test_box, test_type \
+            test_prop_c, test_energy, test_force, test_virial, test_atom_ener, test_coord, test_box, test_type, test_fparam \
                 = self.data_systems[ii].get_test ()
             self.test_prop_c.append(test_prop_c)
             self.test_energy.append(test_energy)
@@ -71,6 +84,7 @@ def __init__ (self,
             self.test_coord.append(test_coord)
             self.test_box.append(test_box)
             self.test_type.append(test_type)
+            self.test_fparam.append(test_fparam)
             ncell = np.ones (3, dtype=np.int32)
             cell_size = np.max (rcut)
             avg_box = np.average (test_box, axis = 0)
@@ -85,6 +99,24 @@ def __init__ (self,
             self.default_mesh.append(default_mesh)
         self.pick_idx = 0
 
+
+    def check_type_map_consistency(self, type_map_list):
+        ret = []
+        for ii in type_map_list:
+            if ii is not None:
+                min_len = min([len(ii), len(ret)])
+                for idx in range(min_len) :
+                    if ii[idx] != ret[idx] :
+                        raise RuntimeError('inconsistent type map: %s %s' % (str(ret), str(ii)))
+                if len(ii) > len(ret) :
+                    ret = ii
+        return ret
+
+
+    def get_type_map(self):
+        return self.type_map
+
+
     def format_name_length(self, name, width) :
         if len(name) <= width:
             return '{: >{}}'.format(name, width)
@@ -150,12 +182,12 @@ def get_batch (self,
             else :
                 prob = self.process_sys_weights(sys_weights)
             self.pick_idx = np.random.choice(np.arange(self.nsystems), p = prob)
-        b_prop_c, b_energy, b_force, b_virial, b_atom_ener, b_coord, b_box, b_type \
+        b_prop_c, b_energy, b_force, b_virial, b_atom_ener, b_coord, b_box, b_type, b_fparam \
             = self.data_systems[self.pick_idx].get_batch(self.batch_size[self.pick_idx])
         return \
             b_prop_c, \
             b_energy, b_force, b_virial, b_atom_ener, \
-            b_coord, b_box, b_type, \
+            b_coord, b_box, b_type, b_fparam, \
             self.natoms_vec[self.pick_idx], \
             self.default_mesh[self.pick_idx]
 
@@ -175,6 +207,7 @@ def get_test (self,
             self.test_coord[idx], \
             self.test_box[idx], \
             self.test_type[idx], \
+            self.test_fparam[idx], \
             self.natoms_vec[idx], \
             self.default_mesh[idx]
             
@@ -193,6 +226,9 @@ def get_sys (self, idx) :
     def get_batch_size(self) :
         return self.batch_size
 
+    def numb_fparam(self) :
+        return self.has_fparam
+
 def _main () :
     sys =  ['/home/wanghan/study/deep.md/results.01/data/mos2/only_raws/20', 
             '/home/wanghan/study/deep.md/results.01/data/mos2/only_raws/30', 
diff --git a/source/train/DeepPot.py b/source/train/DeepPot.py
new file mode 100644
index 0000000000..b4ac06b819
--- /dev/null
+++ b/source/train/DeepPot.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+
+import os,sys
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.framework import ops
+module_path = os.path.dirname(os.path.realpath(__file__))
+assert (os.path.isfile (os.path.join(module_path, "libop_abi.so"))), "op module does not exist"
+op_module = tf.load_op_library(os.path.join(module_path, "libop_abi.so"))
+
+def _load_graph(frozen_graph_filename, 
+               prefix = 'load'):
+    # We load the protobuf file from the disk and parse it to retrieve the 
+    # unserialized graph_def
+    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
+        graph_def = tf.GraphDef()
+        graph_def.ParseFromString(f.read())
+
+    # Then, we can use again a convenient built-in function to import a graph_def into the 
+    # current default Graph
+    with tf.Graph().as_default() as graph:
+        tf.import_graph_def(
+            graph_def, 
+            input_map=None, 
+            return_elements=None, 
+            name=prefix, 
+            producer_op_list=None
+        )
+    return graph
+
+
+def _rep_int (s):
+    try: 
+        int(s)
+        return True
+    except ValueError:
+        return False
+
+
+def _make_default_mesh(test_box) :
+    ncell = np.ones (3, dtype=np.int32)
+    avg_box = np.average (test_box, axis = 0)
+    cell_size = 3
+    avg_box = np.reshape (avg_box, [3,3])
+    for ii in range (3) :
+        ncell[ii] = int ( np.linalg.norm(avg_box[ii]) / cell_size )
+        if (ncell[ii] < 2) : ncell[ii] = 2
+    default_mesh = np.zeros (6, dtype = np.int32)
+    default_mesh[3] = ncell[0]
+    default_mesh[4] = ncell[1]
+    default_mesh[5] = ncell[2]
+    return default_mesh
+
+
+class DeepPot () :
+    def __init__(self, 
+                 model_file) :
+        self.model_file = model_file
+        self.graph = _load_graph (self.model_file)
+        # checkout input/output tensors from graph
+        self.t_ntypes = self.graph.get_tensor_by_name ('load/model_attr/ntypes:0')
+        self.t_rcut   = self.graph.get_tensor_by_name ('load/model_attr/rcut:0')
+        self.t_dfparam= self.graph.get_tensor_by_name ('load/model_attr/dfparam:0')
+        self.t_tmap   = self.graph.get_tensor_by_name ('load/model_attr/tmap:0')
+        # inputs
+        self.t_coord  = self.graph.get_tensor_by_name ('load/i_coord:0')
+        self.t_type   = self.graph.get_tensor_by_name ('load/i_type:0')
+        self.t_natoms = self.graph.get_tensor_by_name ('load/i_natoms:0')
+        self.t_box    = self.graph.get_tensor_by_name ('load/i_box:0')
+        self.t_mesh   = self.graph.get_tensor_by_name ('load/i_mesh:0')
+        # outputs
+        self.t_energy = self.graph.get_tensor_by_name ('load/o_energy:0')
+        self.t_force  = self.graph.get_tensor_by_name ('load/o_force:0')
+        self.t_virial = self.graph.get_tensor_by_name ('load/o_virial:0')
+        self.t_ae     = self.graph.get_tensor_by_name ('load/o_atom_energy:0')
+        self.t_av     = self.graph.get_tensor_by_name ('load/o_atom_virial:0')
+        self.t_fparam = None
+        # check if the graph has fparam
+        for op in self.graph.get_operations():
+            if op.name == 'load/i_fparam' :
+                self.t_fparam = self.graph.get_tensor_by_name ('load/i_fparam:0')
+        self.has_fparam = self.t_fparam is not None
+        # start a tf session associated to the graph
+        self.sess = tf.Session (graph = self.graph)        
+        [self.ntypes, self.rcut, self.dfparam, self.tmap] = self.sess.run([self.t_ntypes, self.t_rcut, self.t_dfparam, self.t_tmap])
+        self.tmap = self.tmap.decode('UTF-8').split()
+
+
+    def get_ntypes(self) :
+        return self.ntypes
+
+    def get_rcut(self) :
+        return self.rcut
+
+    def get_dim_fparam(self) :
+        return self.dfparam
+
+    def get_type_map(self):
+        return self.tmap
+
+
+    def eval(self,
+             coords, 
+             cells, 
+             atom_types, 
+             fparam = None, 
+             atomic = False) :
+        # standarize the shape of inputs
+        coords = np.array(coords)
+        cells = np.array(cells)
+        atom_types = np.array(atom_types, dtype = int)
+        if self.has_fparam :
+            assert(fparam is not None)
+            fparam = np.array(fparam)
+
+        # reshape the inputs 
+        cells = np.reshape(cells, [-1, 9])
+        nframes = cells.shape[0]
+        coords = np.reshape(coords, [nframes, -1])
+        natoms = coords.shape[1] // 3
+        if self.has_fparam :
+            fdim = self.get_dim_fparam()
+            if fparam.size == nframes * fdim :
+                fparam = np.reshape(fparam, [nframes, fdim])
+            elif fparam.size == fdim :
+                fparam = np.tile(fparam.reshape([-1]), [nframes, 1])
+            else :
+                raise RuntimeError('got wrong size of frame param, should be either %d x %d or %d' % (nframes, fdim, fdim))
+
+        # sort inputs
+        coords, atom_types, imap = self._sort_input(coords, atom_types)
+
+        # make natoms_vec and default_mesh
+        natoms_vec = self._make_natoms_vec(atom_types)
+        assert(natoms_vec[0] == natoms)
+        default_mesh = _make_default_mesh(cells)
+
+        # evaluate
+        energy = []
+        force = []
+        virial = []
+        ae = []
+        av = []
+        feed_dict_test = {}
+        feed_dict_test[self.t_natoms] = natoms_vec
+        feed_dict_test[self.t_mesh  ] = default_mesh
+        feed_dict_test[self.t_type  ] = atom_types
+        t_out = [self.t_energy, 
+                 self.t_force, 
+                 self.t_virial]
+        if atomic :
+            t_out += [self.t_ae, 
+                      self.t_av]
+        for ii in range(nframes) :
+            feed_dict_test[self.t_coord] = np.reshape(coords[ii:ii+1, :], [-1])
+            feed_dict_test[self.t_box  ] = cells[ii:ii+1, :]
+            if self.has_fparam:
+                feed_dict_test[self.t_fparam] = np.reshape(fparam[ii:ii+1, :], [-1])
+            v_out = self.sess.run (t_out, feed_dict = feed_dict_test)
+            energy.append(v_out[0])
+            force .append(v_out[1])
+            virial.append(v_out[2])
+            if atomic:
+                ae.append(v_out[3])
+                av.append(v_out[4])
+
+        # reverse map of the outputs
+        force  = self._reverse_map(np.reshape(force, [nframes,-1,3]), imap)
+        if atomic :
+            ae  = self._reverse_map(np.reshape(ae, [nframes,-1,1]), imap)
+            av  = self._reverse_map(np.reshape(av, [nframes,-1,9]), imap)
+
+        energy = np.reshape(energy, [nframes, 1])
+        force = np.reshape(force, [nframes, natoms, 3])
+        virial = np.reshape(virial, [nframes, 9])
+        if atomic:
+            ae = np.reshape(ae, [nframes, natoms, 1])
+            av = np.reshape(av, [nframes, natoms, 9])
+            return energy, force, virial, ae, av
+        else :
+            return energy, force, virial
+
+
+    def _sort_input(self, coord, atom_type) :
+        natoms = atom_type.size
+        idx = np.arange (natoms)
+        idx_map = np.lexsort ((idx, atom_type))
+        nframes = coord.shape[0]
+        coord = coord.reshape([nframes, -1, 3])
+        coord = np.reshape(coord[:,idx_map,:], [nframes, -1])
+        atom_type = atom_type[idx_map]
+        return coord, atom_type, idx_map
+
+
+    def _reverse_map(self, vec, imap):
+        ret = np.zeros(vec.shape)
+        for idx,ii in enumerate(imap) :
+            ret[:,ii,:] = vec[:,idx,:]
+        return ret
+
+        
+    def _make_natoms_vec(self, atom_types) :
+        natoms_vec = np.zeros (self.ntypes+2).astype(int)
+        natoms = atom_types.size
+        natoms_vec[0] = natoms
+        natoms_vec[1] = natoms
+        for ii in range (self.ntypes) :
+            natoms_vec[ii+2] = np.count_nonzero(atom_types == ii)
+        return natoms_vec
+
diff --git a/source/train/DescrptLocFrame.py b/source/train/DescrptLocFrame.py
new file mode 100644
index 0000000000..56e7955174
--- /dev/null
+++ b/source/train/DescrptLocFrame.py
@@ -0,0 +1,214 @@
+import os
+import numpy as np
+import tensorflow as tf
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
+op_module = tf.load_op_library(module_path + "libop_abi.so")
+
+
+class DescrptLocFrame () :
+    def __init__(self, jdata):
+        # descrpt config
+        self.sel_a = j_must_have (jdata, 'sel_a')
+        self.sel_r = j_must_have (jdata, 'sel_r')
+        self.ntypes = len(self.sel_a)
+        assert(self.ntypes == len(self.sel_r))
+        self.rcut_a = -1
+        self.rcut_r = j_must_have (jdata, 'rcut')
+        # axis
+        self.axis_rule = j_must_have (jdata, 'axis_rule')
+        # numb of neighbors and numb of descrptors
+        self.nnei_a = np.cumsum(self.sel_a)[-1]
+        self.nnei_r = np.cumsum(self.sel_r)[-1]
+        self.nnei = self.nnei_a + self.nnei_r
+        self.ndescrpt_a = self.nnei_a * 4
+        self.ndescrpt_r = self.nnei_r * 1
+        self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
+
+    def get_rcut (self) :
+        return self.rcut_r
+
+    def get_ntypes (self) :
+        return self.ntypes
+
+    def get_dim_out (self) :
+        return self.ndescrpt
+
+    def get_nlist (self) :
+        return self.nlist, self.rij, self.sel_a, self.sel_r
+
+    def compute_dstats (self,
+                        data_coord, 
+                        data_box, 
+                        data_atype, 
+                        natoms_vec,
+                        mesh, 
+                        reuse = None) :
+        all_davg = []
+        all_dstd = []
+        if True:
+            sumv = []
+            sumn = []
+            sumv2 = []
+            for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
+                sysv,sysv2,sysn \
+                    = self._compute_dstats_sys_nonsmth(cc,bb,tt,nn,mm,reuse)
+                sumv.append(sysv)
+                sumn.append(sysn)
+                sumv2.append(sysv2)
+            sumv = np.sum(sumv, axis = 0)
+            sumn = np.sum(sumn, axis = 0)
+            sumv2 = np.sum(sumv2, axis = 0)
+            for type_i in range(self.ntypes) :
+                davg = sumv[type_i] /  sumn[type_i]
+                dstd = self._compute_std(sumv2[type_i], sumv[type_i], sumn[type_i])
+                for ii in range (len(dstd)) :
+                    if (np.abs(dstd[ii]) < 1e-2) :
+                        dstd[ii] = 1e-2            
+                all_davg.append(davg)
+                all_dstd.append(dstd)
+        davg = np.array(all_davg)
+        dstd = np.array(all_dstd)
+        return davg, dstd
+        
+        
+    def build (self, 
+               coord_, 
+               atype_,
+               natoms,
+               box, 
+               mesh,
+               davg = None, 
+               dstd = None,
+               suffix = '', 
+               reuse = None):
+        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
+            if davg is None:
+                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+            if dstd is None:
+                dstd = np.ones ([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), 
+                                 name = 'rcut', 
+                                 dtype = global_tf_float_precision)
+            t_ntypes = tf.constant(self.ntypes, 
+                                   name = 'ntypes', 
+                                   dtype = tf.int32)
+            self.t_avg = tf.get_variable('t_avg', 
+                                         davg.shape, 
+                                         dtype = global_tf_float_precision,
+                                         trainable = False,
+                                         initializer = tf.constant_initializer(davg, dtype = global_tf_float_precision))
+            self.t_std = tf.get_variable('t_std', 
+                                         dstd.shape, 
+                                         dtype = global_tf_float_precision,
+                                         trainable = False,
+                                         initializer = tf.constant_initializer(dstd, dtype = global_tf_float_precision))
+
+        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
+        atype = tf.reshape (atype_, [-1, natoms[1]])
+
+        self.descrpt, self.descrpt_deriv, self.rij, self.nlist, self.axis \
+            = op_module.descrpt (coord,
+                                 atype,
+                                 natoms,
+                                 box,                                    
+                                 mesh,
+                                 self.t_avg,
+                                 self.t_std,
+                                 rcut_a = self.rcut_a,
+                                 rcut_r = self.rcut_r,
+                                 sel_a = self.sel_a,
+                                 sel_r = self.sel_r,
+                                 axis_rule = self.axis_rule)
+        self.descrpt = tf.reshape(self.descrpt, [-1, self.ndescrpt])
+        return self.descrpt
+
+
+    def prod_force_virial(self, atom_ener, natoms) :
+        [net_deriv] = tf.gradients (atom_ener, self.descrpt)
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, natoms[0] * self.ndescrpt])
+        force = op_module.prod_force (net_deriv_reshape,
+                                      self.descrpt_deriv,
+                                      self.nlist,
+                                      self.axis,
+                                      natoms,
+                                      n_a_sel = self.nnei_a,
+                                      n_r_sel = self.nnei_r)
+        virial, atom_virial \
+            = op_module.prod_virial (net_deriv_reshape,
+                                     self.descrpt_deriv,
+                                     self.rij,
+                                     self.nlist,
+                                     self.axis,
+                                     natoms,
+                                     n_a_sel = self.nnei_a,
+                                     n_r_sel = self.nnei_r)
+
+        return force, virial, atom_virial
+
+
+    def _compute_dstats_sys_nonsmth (self,
+                                    data_coord, 
+                                    data_box, 
+                                    data_atype, 
+                                    natoms_vec,
+                                    mesh,
+                                    reuse = None) :    
+        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
+        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
+        sub_graph = tf.Graph()
+        with sub_graph.as_default():
+            descrpt, descrpt_deriv, rij, nlist, axis \
+                = op_module.descrpt (tf.constant(data_coord),
+                                     tf.constant(data_atype),
+                                     tf.constant(natoms_vec, dtype = tf.int32),
+                                     tf.constant(data_box),
+                                     tf.constant(mesh),
+                                     tf.constant(avg_zero),
+                                     tf.constant(std_ones),
+                                     rcut_a = self.rcut_a,
+                                     rcut_r = self.rcut_r,
+                                     sel_a = self.sel_a,
+                                     sel_r = self.sel_r,
+                                     axis_rule = self.axis_rule)
+        # self.sess.run(tf.global_variables_initializer())
+        # sub_sess = tf.Session(graph = sub_graph, 
+        #                       config=tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads, 
+        #                                             inter_op_parallelism_threads=self.run_opt.num_inter_threads
+        #                       ))
+        sub_sess = tf.Session(graph = sub_graph)
+        dd_all = sub_sess.run(descrpt)
+        sub_sess.close()
+        natoms = natoms_vec
+        dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
+        start_index = 0
+        sysv = []
+        sysn = []
+        sysv2 = []
+        for type_i in range(self.ntypes):
+            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            dd = dd_all[:, start_index:end_index]
+            dd = np.reshape(dd, [-1, self.ndescrpt])
+            start_index = end_index        
+            # compute
+            sumv = np.sum(dd, axis = 0)
+            sumn = dd.shape[0]
+            sumv2 = np.sum(np.multiply(dd,dd), axis = 0)            
+            sysv.append(sumv)
+            sysn.append(sumn)
+            sysv2.append(sumv2)
+        return sysv, sysv2, sysn
+
+
+    def _compute_std (self,sumv2, sumv, sumn) :
+        return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
+
+    
diff --git a/source/train/DescrptSeA.py b/source/train/DescrptSeA.py
new file mode 100644
index 0000000000..5683aa0cc7
--- /dev/null
+++ b/source/train/DescrptSeA.py
@@ -0,0 +1,432 @@
+import os,sys,warnings
+import numpy as np
+import tensorflow as tf
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
+op_module = tf.load_op_library(module_path + "libop_abi.so")
+
+class DescrptSeA ():
+    def __init__ (self, jdata):
+        # descrpt config
+        self.sel_a = j_must_have (jdata, 'sel')
+        self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
+        self.ntypes = len(self.sel_a)
+        assert(self.ntypes == len(self.sel_r))
+        self.rcut_a = -1
+        self.rcut_r = j_must_have (jdata, 'rcut')
+        self.rcut = self.rcut_r
+        if j_have(jdata, 'rcut_smth') :
+            self.rcut_r_smth = jdata['rcut_smth']
+        else :
+            self.rcut_r_smth = self.rcut_r
+        # filter of smooth version
+        self.filter_neuron = j_must_have (jdata, 'neuron')
+        self.n_axis_neuron = j_must_have_d (jdata, 'axis_neuron', ['n_axis_neuron'])
+        self.filter_resnet_dt = False
+        if j_have(jdata, 'resnet_dt') :
+            self.filter_resnet_dt = jdata['resnet_dt']        
+        # numb of neighbors and numb of descrptors
+        self.nnei_a = np.cumsum(self.sel_a)[-1]
+        self.nnei_r = np.cumsum(self.sel_r)[-1]
+        self.nnei = self.nnei_a + self.nnei_r
+        self.ndescrpt_a = self.nnei_a * 4
+        self.ndescrpt_r = self.nnei_r * 1
+        self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
+
+        self.seed = None
+        if j_have (jdata, 'seed') :
+            self.seed = jdata['seed']
+        self.useBN = False
+
+    def get_rcut (self) :
+        return self.rcut
+
+    def get_ntypes (self) :
+        return self.ntypes
+
+    def get_dim_out (self) :
+        return self.filter_neuron[-1] * self.n_axis_neuron
+
+    def get_nlist (self) :
+        return self.nlist, self.rij, self.sel_a, self.sel_r
+
+    def compute_dstats (self,
+                        data_coord, 
+                        data_box, 
+                        data_atype, 
+                        natoms_vec,
+                        mesh,
+                        reuse = None) :    
+        all_davg = []
+        all_dstd = []
+        if True:
+            sumr = []
+            suma = []
+            sumn = []
+            sumr2 = []
+            suma2 = []
+            for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
+                sysr,sysr2,sysa,sysa2,sysn \
+                    = self._compute_dstats_sys_smth(cc,bb,tt,nn,mm,reuse)
+                sumr.append(sysr)
+                suma.append(sysa)
+                sumn.append(sysn)
+                sumr2.append(sysr2)
+                suma2.append(sysa2)
+            sumr = np.sum(sumr, axis = 0)
+            suma = np.sum(suma, axis = 0)
+            sumn = np.sum(sumn, axis = 0)
+            sumr2 = np.sum(sumr2, axis = 0)
+            suma2 = np.sum(suma2, axis = 0)
+            for type_i in range(self.ntypes) :
+                davgunit = [sumr[type_i]/sumn[type_i], 0, 0, 0]
+                dstdunit = [self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]), 
+                            self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]), 
+                            self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]), 
+                            self._compute_std(suma2[type_i], suma[type_i], sumn[type_i])
+                            ]
+                davg = np.tile(davgunit, self.ndescrpt // 4)
+                dstd = np.tile(dstdunit, self.ndescrpt // 4)
+                all_davg.append(davg)
+                all_dstd.append(dstd)
+
+        davg = np.array(all_davg)
+        dstd = np.array(all_dstd)
+
+        return davg, dstd
+
+
+    def build (self, 
+               coord_, 
+               atype_,
+               natoms,
+               box, 
+               mesh,
+               davg = None, 
+               dstd = None,
+               suffix = '', 
+               reuse = None):
+
+        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
+            if davg is None:
+                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+            if dstd is None:
+                dstd = np.ones ([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), 
+                                 name = 'rcut', 
+                                 dtype = global_tf_float_precision)
+            t_ntypes = tf.constant(self.ntypes, 
+                                   name = 'ntypes', 
+                                   dtype = tf.int32)
+            self.t_avg = tf.get_variable('t_avg', 
+                                         davg.shape, 
+                                         dtype = global_tf_float_precision,
+                                         trainable = False,
+                                         initializer = tf.constant_initializer(davg, dtype = global_tf_float_precision))
+            self.t_std = tf.get_variable('t_std', 
+                                         dstd.shape, 
+                                         dtype = global_tf_float_precision,
+                                         trainable = False,
+                                         initializer = tf.constant_initializer(dstd, dtype = global_tf_float_precision))
+
+        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
+        atype = tf.reshape (atype_, [-1, natoms[1]])
+
+        self.descrpt, self.descrpt_deriv, self.rij, self.nlist \
+            = op_module.descrpt_se_a (coord,
+                                       atype,
+                                       natoms,
+                                       box,
+                                       mesh,
+                                       self.t_avg,
+                                       self.t_std,
+                                       rcut_a = self.rcut_a,
+                                       rcut_r = self.rcut_r,
+                                       rcut_r_smth = self.rcut_r_smth,
+                                       sel_a = self.sel_a,
+                                       sel_r = self.sel_r)
+
+        self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
+
+        self.dout = self._pass_filter(self.descrpt_reshape, natoms, suffix = suffix, reuse = reuse)
+
+        return self.dout
+
+
+    def prod_force_virial(self, atom_ener, natoms) :
+        [net_deriv] = tf.gradients (atom_ener, self.descrpt_reshape)
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, natoms[0] * self.ndescrpt])        
+        force \
+            = op_module.prod_force_se_a (net_deriv_reshape,
+                                          self.descrpt_deriv,
+                                          self.nlist,
+                                          natoms,
+                                          n_a_sel = self.nnei_a,
+                                          n_r_sel = self.nnei_r)
+        virial, atom_virial \
+            = op_module.prod_virial_se_a (net_deriv_reshape,
+                                           self.descrpt_deriv,
+                                           self.rij,
+                                           self.nlist,
+                                           natoms,
+                                           n_a_sel = self.nnei_a,
+                                           n_r_sel = self.nnei_r)
+        return force, virial, atom_virial
+        
+
+    def _pass_filter(self, 
+                     inputs,
+                     natoms,
+                     reuse = None,
+                     suffix = '') :
+        start_index = 0
+        inputs = tf.reshape(inputs, [-1, self.ndescrpt * natoms[0]])
+        shape = inputs.get_shape().as_list()
+        output = []
+        for type_i in range(self.ntypes):
+            inputs_i = tf.slice (inputs,
+                                 [ 0, start_index*      self.ndescrpt],
+                                 [-1, natoms[2+type_i]* self.ndescrpt] )
+            inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
+            layer = self._filter(inputs_i, name='filter_type_'+str(type_i)+suffix, natoms=natoms, reuse=reuse, seed = self.seed)
+            layer = tf.reshape(layer, [-1, natoms[2+type_i] * self.get_dim_out()])
+            output.append(layer)
+            start_index += natoms[2+type_i]
+        output = tf.concat(output, axis = 1)
+        return output
+
+
+    def _compute_dstats_sys_smth (self,
+                                 data_coord, 
+                                 data_box, 
+                                 data_atype,                             
+                                 natoms_vec,
+                                 mesh,
+                                 reuse = None) :    
+        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
+        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
+        sub_graph = tf.Graph()
+        with sub_graph.as_default():
+            descrpt, descrpt_deriv, rij, nlist \
+                = op_module.descrpt_se_a (tf.constant(data_coord),
+                                           tf.constant(data_atype),
+                                           tf.constant(natoms_vec, dtype = tf.int32),
+                                           tf.constant(data_box),
+                                           tf.constant(mesh),
+                                           tf.constant(avg_zero),
+                                           tf.constant(std_ones),
+                                           rcut_a = self.rcut_a,
+                                           rcut_r = self.rcut_r,
+                                           rcut_r_smth = self.rcut_r_smth,
+                                           sel_a = self.sel_a,
+                                           sel_r = self.sel_r)
+        # self.sess.run(tf.global_variables_initializer())
+        # sub_sess = tf.Session(graph = sub_graph,
+        #                       config=tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads, 
+        #                                             inter_op_parallelism_threads=self.run_opt.num_inter_threads
+
+        #                       ))
+        sub_sess = tf.Session(graph = sub_graph)
+        dd_all = sub_sess.run(descrpt)
+        sub_sess.close()
+        natoms = natoms_vec
+        dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
+        start_index = 0
+        sysr = []
+        sysa = []
+        sysn = []
+        sysr2 = []
+        sysa2 = []
+        for type_i in range(self.ntypes):
+            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            dd = dd_all[:, start_index:end_index]
+            dd = np.reshape(dd, [-1, self.ndescrpt])
+            start_index = end_index        
+            # compute
+            dd = np.reshape (dd, [-1, 4])
+            ddr = dd[:,:1]
+            dda = dd[:,1:]
+            sumr = np.sum(ddr)
+            suma = np.sum(dda) / 3.
+            sumn = dd.shape[0]
+            sumr2 = np.sum(np.multiply(ddr, ddr))
+            suma2 = np.sum(np.multiply(dda, dda)) / 3.
+            sysr.append(sumr)
+            sysa.append(suma)
+            sysn.append(sumn)
+            sysr2.append(sumr2)
+            sysa2.append(suma2)
+        return sysr, sysr2, sysa, sysa2, sysn
+
+
+    def _compute_std (self,sumv2, sumv, sumn) :
+        return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
+
+
+    def _filter(self, 
+                   inputs, 
+                   natoms,
+                   activation_fn=tf.nn.tanh, 
+                   stddev=1.0,
+                   bavg=0.0,
+                   name='linear', 
+                   reuse=None,
+                   seed=None):
+        # natom x (nei x 4)
+        shape = inputs.get_shape().as_list()
+        outputs_size = [1] + self.filter_neuron
+        outputs_size_2 = self.n_axis_neuron
+        with tf.variable_scope(name, reuse=reuse):
+          start_index = 0
+          xyz_scatter_total = []
+          for type_i in range(self.ntypes):
+            # cut-out inputs
+            # with natom x (nei_type_i x 4)  
+            inputs_i = tf.slice (inputs,
+                                 [ 0, start_index*      4],
+                                 [-1, self.sel_a[type_i]* 4] )
+            start_index += self.sel_a[type_i]
+            shape_i = inputs_i.get_shape().as_list()
+            # with (natom x nei_type_i) x 4  
+            inputs_reshape = tf.reshape(inputs_i, [-1, 4])
+            xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
+            for ii in range(1, len(outputs_size)):
+              w = tf.get_variable('matrix_'+str(ii)+'_'+str(type_i), 
+                                [outputs_size[ii - 1], outputs_size[ii]], 
+                                global_tf_float_precision,
+                                tf.random_normal_initializer(stddev=stddev/np.sqrt(outputs_size[ii]+outputs_size[ii-1]), seed = seed))
+              b = tf.get_variable('bias_'+str(ii)+'_'+str(type_i), 
+                                [1, outputs_size[ii]], 
+                                global_tf_float_precision,
+                                tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed))
+              if self.filter_resnet_dt :
+                  idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
+                                        [1, outputs_size[ii]], 
+                                        global_tf_float_precision,
+                                        tf.random_normal_initializer(stddev=0.001, mean = 1.0, seed = seed))
+              if outputs_size[ii] == outputs_size[ii-1]:
+                  if self.filter_resnet_dt :
+                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                  else :
+                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
+              elif outputs_size[ii] == outputs_size[ii-1] * 2: 
+                  if self.filter_resnet_dt :
+                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                  else :
+                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
+              else:
+                  xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
+            # natom x nei_type_i x out_size
+            xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
+            xyz_scatter_total.append(xyz_scatter)
+
+          # natom x nei x outputs_size
+          xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
+          # natom x nei x 4
+          inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4])
+          # natom x 4 x outputs_size
+          xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True)
+          xyz_scatter_1 = xyz_scatter_1 * (4.0 / shape[1])
+          # natom x 4 x outputs_size_2
+          xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
+          # natom x outputs_size x outputs_size_2
+          result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a = True)
+          # natom x (outputs_size x outputs_size_2)
+          result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])
+
+        return result
+
+    def _filter_type_ext(self, 
+                           inputs, 
+                           natoms,
+                           activation_fn=tf.nn.tanh, 
+                           stddev=1.0,
+                           bavg=0.0,
+                           name='linear', 
+                           reuse=None,
+                           seed=None):
+        # natom x (nei x 4)
+        shape = inputs.get_shape().as_list()
+        outputs_size = [1] + self.filter_neuron
+        outputs_size_2 = self.n_axis_neuron
+        with tf.variable_scope(name, reuse=reuse):
+          start_index = 0
+          result_all = []
+          xyz_scatter_1_all = []
+          xyz_scatter_2_all = []
+          for type_i in range(self.ntypes):
+            # cut-out inputs
+            # with natom x (nei_type_i x 4)  
+            inputs_i = tf.slice (inputs,
+                                 [ 0, start_index*      4],
+                                 [-1, self.sel_a[type_i]* 4] )
+            start_index += self.sel_a[type_i]
+            shape_i = inputs_i.get_shape().as_list()
+            # with (natom x nei_type_i) x 4  
+            inputs_reshape = tf.reshape(inputs_i, [-1, 4])
+            xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
+            for ii in range(1, len(outputs_size)):
+              w = tf.get_variable('matrix_'+str(ii)+'_'+str(type_i), 
+                                [outputs_size[ii - 1], outputs_size[ii]], 
+                                global_tf_float_precision,
+                                tf.random_normal_initializer(stddev=stddev/np.sqrt(outputs_size[ii]+outputs_size[ii-1]), seed = seed))
+              b = tf.get_variable('bias_'+str(ii)+'_'+str(type_i), 
+                                [1, outputs_size[ii]], 
+                                global_tf_float_precision,
+                                tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed))
+              if self.filter_resnet_dt :
+                  idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
+                                        [1, outputs_size[ii]], 
+                                        global_tf_float_precision,
+                                        tf.random_normal_initializer(stddev=0.001, mean = 1.0, seed = seed))
+              if outputs_size[ii] == outputs_size[ii-1]:
+                  if self.filter_resnet_dt :
+                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                  else :
+                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
+              elif outputs_size[ii] == outputs_size[ii-1] * 2: 
+                  if self.filter_resnet_dt :
+                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                  else :
+                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
+              else:
+                  xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
+            # natom x nei_type_i x out_size
+            xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
+            # natom x nei_type_i x 4  
+            inputs_i_reshape = tf.reshape(inputs_i, [-1, shape_i[1]//4, 4])
+            # natom x 4 x outputs_size
+            xyz_scatter_1 = tf.matmul(inputs_i_reshape, xyz_scatter, transpose_a = True)
+            xyz_scatter_1 = xyz_scatter_1 * (4.0 / shape_i[1])
+            # natom x 4 x outputs_size_2
+            xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
+            xyz_scatter_1_all.append(xyz_scatter_1)
+            xyz_scatter_2_all.append(xyz_scatter_2)
+
+          # for type_i in range(self.ntypes):
+          #   for type_j in range(type_i, self.ntypes):
+          #     # natom x outputs_size x outputs_size_2
+          #     result = tf.matmul(xyz_scatter_1_all[type_i], xyz_scatter_2_all[type_j], transpose_a = True)
+          #     # natom x (outputs_size x outputs_size_2)
+          #     result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])
+          #     result_all.append(tf.identity(result))
+          xyz_scatter_2_coll = tf.concat(xyz_scatter_2_all, axis = 2)
+          for type_i in range(self.ntypes) :
+              # natom x outputs_size x (outputs_size_2 x ntypes)
+              result = tf.matmul(xyz_scatter_1_all[type_i], xyz_scatter_2_coll, transpose_a = True)
+              # natom x (outputs_size x outputs_size_2 x ntypes)
+              result = tf.reshape(result, [-1, outputs_size_2 * self.ntypes * outputs_size[-1]])
+              result_all.append(tf.identity(result))              
+
+          # natom x (ntypes x outputs_size x outputs_size_2 x ntypes)
+          result_all = tf.concat(result_all, axis = 1)
+
+        return result_all
diff --git a/source/train/DescrptSeR.py b/source/train/DescrptSeR.py
new file mode 100644
index 0000000000..416220a14c
--- /dev/null
+++ b/source/train/DescrptSeR.py
@@ -0,0 +1,304 @@
+import os,warnings
+import numpy as np
+import tensorflow as tf
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
+op_module = tf.load_op_library(module_path + "libop_abi.so")
+
+class DescrptSeR ():
+    def __init__ (self, jdata):
+        # descrpt config
+        self.sel_r = j_must_have (jdata, 'sel')
+        self.sel_a = [ 0 for ii in range(len(self.sel_r)) ]
+        self.sel = self.sel_r
+        self.ntypes = len(self.sel_r)
+        self.rcut = j_must_have (jdata, 'rcut')
+        if j_have(jdata, 'rcut_smth') :
+            self.rcut_smth = jdata['rcut_smth']
+        else :
+            self.rcut_smth = self.rcut
+        # filter of smooth version
+        self.filter_neuron = j_must_have (jdata, 'neuron')
+        self.filter_resnet_dt = False
+        if j_have(jdata, 'resnet_dt') :
+            self.filter_resnet_dt = jdata['resnet_dt']        
+        # numb of neighbors and numb of descrptors
+        self.nnei_a = np.cumsum(self.sel_a)[-1]
+        self.nnei_r = np.cumsum(self.sel_r)[-1]
+        self.nnei = np.cumsum(self.sel)[-1]
+        self.ndescrpt_a = self.nnei_a * 4
+        self.ndescrpt_r = self.nnei_r * 1
+        self.ndescrpt = self.nnei_r
+
+        self.seed = None
+        if j_have (jdata, 'seed') :
+            self.seed = jdata['seed']
+        self.useBN = False
+
+    def get_rcut (self) :
+        return self.rcut
+
+    def get_ntypes (self) :
+        return self.ntypes
+
+    def get_dim_out (self) :
+        return self.filter_neuron[-1]
+
+    def get_nlist (self) :
+        return self.nlist, self.rij, self.sel_a, self.sel_r
+
+    def compute_dstats (self,
+                        data_coord, 
+                        data_box, 
+                        data_atype, 
+                        natoms_vec,
+                        mesh,
+                        reuse = None) :    
+        all_davg = []
+        all_dstd = []
+        sumr = []
+        sumn = []
+        sumr2 = []
+        for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
+            sysr,sysr2,sysn \
+                = self._compute_dstats_sys_se_r(cc,bb,tt,nn,mm,reuse)
+            sumr.append(sysr)
+            sumn.append(sysn)
+            sumr2.append(sysr2)
+        sumr = np.sum(sumr, axis = 0)
+        sumn = np.sum(sumn, axis = 0)
+        sumr2 = np.sum(sumr2, axis = 0)
+        for type_i in range(self.ntypes) :
+            davgunit = [sumr[type_i]/sumn[type_i]]
+            dstdunit = [self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i])]
+            davg = np.tile(davgunit, self.ndescrpt // 1)
+            dstd = np.tile(dstdunit, self.ndescrpt // 1)
+            all_davg.append(davg)
+            all_dstd.append(dstd)
+
+        davg = np.array(all_davg)
+        dstd = np.array(all_dstd)
+
+        return davg, dstd
+
+    def build (self, 
+               coord_, 
+               atype_,
+               natoms,
+               box, 
+               mesh,
+               davg = None, 
+               dstd = None,
+               suffix = '', 
+               reuse = None):
+        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
+            if davg is None:
+                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+            if dstd is None:
+                dstd = np.ones ([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(self.rcut, 
+                                 name = 'rcut', 
+                                 dtype = global_tf_float_precision)
+            t_ntypes = tf.constant(self.ntypes, 
+                                   name = 'ntypes', 
+                                   dtype = tf.int32)
+            self.t_avg = tf.get_variable('t_avg', 
+                                         davg.shape, 
+                                         dtype = global_tf_float_precision,
+                                         trainable = False,
+                                         initializer = tf.constant_initializer(davg, dtype = global_tf_float_precision))
+            self.t_std = tf.get_variable('t_std', 
+                                         dstd.shape, 
+                                         dtype = global_tf_float_precision,
+                                         trainable = False,
+                                         initializer = tf.constant_initializer(dstd, dtype = global_tf_float_precision))
+
+        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
+        atype = tf.reshape (atype_, [-1, natoms[1]])
+
+        self.descrpt, self.descrpt_deriv, self.rij, self.nlist \
+            = op_module.descrpt_se_r (coord,
+                                      atype,
+                                      natoms,
+                                      box,
+                                      mesh,
+                                      self.t_avg,
+                                      self.t_std,
+                                      rcut = self.rcut,
+                                      rcut_smth = self.rcut_smth,
+                                      sel = self.sel_r)
+
+        self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
+
+        self.dout = self._pass_filter(self.descrpt_reshape, natoms, suffix = suffix, reuse = reuse)
+
+        return self.dout
+
+
+    def prod_force_virial(self, atom_ener, natoms) :
+        [net_deriv] = tf.gradients (atom_ener, self.descrpt_reshape)
+        net_deriv_reshape = tf.reshape (net_deriv, [-1, natoms[0] * self.ndescrpt])        
+        force \
+            = op_module.prod_force_se_r (net_deriv_reshape,
+                                         self.descrpt_deriv,
+                                         self.nlist,
+                                         natoms)
+        virial, atom_virial \
+            = op_module.prod_virial_se_r (net_deriv_reshape,
+                                          self.descrpt_deriv,
+                                          self.rij,
+                                          self.nlist,
+                                          natoms)
+        return force, virial, atom_virial
+    
+
+    def _pass_filter(self, 
+                     inputs,
+                     natoms,
+                     reuse = None,
+                     suffix = '') :
+        start_index = 0
+        inputs = tf.reshape(inputs, [-1, self.ndescrpt * natoms[0]])
+        shape = inputs.get_shape().as_list()
+        output = []
+        for type_i in range(self.ntypes):
+            inputs_i = tf.slice (inputs,
+                                 [ 0, start_index*      self.ndescrpt],
+                                 [-1, natoms[2+type_i]* self.ndescrpt] )
+            inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
+            layer = self._filter_r(inputs_i, name='filter_type_'+str(type_i)+suffix, natoms=natoms, reuse=reuse, seed = self.seed)
+            layer = tf.reshape(layer, [-1, natoms[2+type_i] * self.get_dim_out()])
+            output.append(layer)
+            start_index += natoms[2+type_i]
+        output = tf.concat(output, axis = 1)
+        return output
+
+    def _compute_dstats_sys_se_r (self,
+                                  data_coord, 
+                                  data_box, 
+                                  data_atype,                             
+                                  natoms_vec,
+                                  mesh,
+                                  reuse = None) :    
+        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
+        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
+        sub_graph = tf.Graph()
+        with sub_graph.as_default():
+            descrpt, descrpt_deriv, rij, nlist \
+                = op_module.descrpt_se_r (tf.constant(data_coord),
+                                           tf.constant(data_atype),
+                                           tf.constant(natoms_vec, dtype = tf.int32),
+                                           tf.constant(data_box),
+                                           tf.constant(mesh),
+                                           tf.constant(avg_zero),
+                                           tf.constant(std_ones),
+                                           rcut = self.rcut,
+                                           rcut_smth = self.rcut_smth,
+                                           sel = self.sel)
+        # sub_sess = tf.Session(graph = sub_graph,
+        #                       config=tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads, 
+        #                                             inter_op_parallelism_threads=self.run_opt.num_inter_threads
+
+        #                       ))
+        sub_sess = tf.Session(graph = sub_graph)
+        dd_all = sub_sess.run(descrpt)
+        sub_sess.close()
+        natoms = natoms_vec
+        dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
+        start_index = 0
+        sysr = []
+        sysa = []
+        sysn = []
+        sysr2 = []
+        sysa2 = []
+        for type_i in range(self.ntypes):
+            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            dd = dd_all[:, start_index:end_index]
+            dd = np.reshape(dd, [-1, self.ndescrpt])
+            start_index = end_index        
+            # compute
+            dd = np.reshape (dd, [-1, 1])
+            ddr = dd[:,:1]
+            sumr = np.sum(ddr)
+            sumn = dd.shape[0]
+            sumr2 = np.sum(np.multiply(ddr, ddr))
+            sysr.append(sumr)
+            sysn.append(sumn)
+            sysr2.append(sumr2)
+        return sysr, sysr2, sysn
+
+
+    def _compute_std (self,sumv2, sumv, sumn) :
+        return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
+
+    def _filter_r(self, 
+                  inputs, 
+                  natoms,
+                  activation_fn=tf.nn.tanh, 
+                  stddev=1.0,
+                  bavg=0.0,
+                  name='linear', 
+                  reuse=None,
+                  seed=None):
+        # natom x nei
+        shape = inputs.get_shape().as_list()
+        outputs_size = [1] + self.filter_neuron
+        with tf.variable_scope(name, reuse=reuse):
+            start_index = 0
+            xyz_scatter_total = []
+            for type_i in range(self.ntypes):
+                # cut-out inputs
+                # with natom x nei_type_i
+                inputs_i = tf.slice (inputs,
+                                     [ 0, start_index       ],
+                                     [-1, self.sel_r[type_i]] )
+                start_index += self.sel_r[type_i]
+                shape_i = inputs_i.get_shape().as_list()
+                # with (natom x nei_type_i) x 1
+                xyz_scatter = tf.reshape(inputs_i, [-1, 1])
+                for ii in range(1, len(outputs_size)):
+                    w = tf.get_variable('matrix_'+str(ii)+'_'+str(type_i), 
+                                        [outputs_size[ii - 1], outputs_size[ii]], 
+                                        global_tf_float_precision,
+                                        tf.random_normal_initializer(stddev=stddev/np.sqrt(outputs_size[ii]+outputs_size[ii-1]), seed = seed))
+                    b = tf.get_variable('bias_'+str(ii)+'_'+str(type_i), 
+                                        [1, outputs_size[ii]], 
+                                        global_tf_float_precision,
+                                        tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed))
+                    if self.filter_resnet_dt :
+                        idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
+                                              [1, outputs_size[ii]], 
+                                              global_tf_float_precision,
+                                              tf.random_normal_initializer(stddev=0.001, mean = 1.0, seed = seed))
+                    if outputs_size[ii] == outputs_size[ii-1]:
+                        if self.filter_resnet_dt :
+                            xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                        else :
+                            xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
+                    elif outputs_size[ii] == outputs_size[ii-1] * 2: 
+                        if self.filter_resnet_dt :
+                            xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
+                        else :
+                            xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
+                    else:
+                        xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
+                # natom x nei_type_i x out_size
+                xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1], outputs_size[-1]))
+                xyz_scatter_total.append(xyz_scatter)
+
+            # natom x nei x outputs_size
+            xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
+            # natom x outputs_size
+            # 
+            res_rescale = 1./5.
+            result = tf.reduce_mean(xyz_scatter, axis = 1) * res_rescale
+
+        return result
diff --git a/source/train/Fitting.py b/source/train/Fitting.py
new file mode 100644
index 0000000000..58b5483253
--- /dev/null
+++ b/source/train/Fitting.py
@@ -0,0 +1,130 @@
+import os,warnings
+import numpy as np
+import tensorflow as tf
+
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+class EnerFitting ():
+    def __init__ (self, jdata, descrpt):
+        # model param
+        self.ntypes = descrpt.get_ntypes()
+        self.dim_descrpt = descrpt.get_dim_out()
+        # fparam
+        self.numb_fparam = 0
+        if j_have(jdata, 'numb_fparam') :
+            self.numb_fparam = jdata['numb_fparam']
+        # network size
+        self.n_neuron = j_must_have_d (jdata, 'neuron', ['n_neuron'])
+        self.resnet_dt = True
+        if j_have(jdata, 'resnet_dt') :
+            self.resnet_dt = jdata['resnet_dt']
+        
+        self.seed = None
+        if j_have (jdata, 'seed') :
+            self.seed = jdata['seed']
+        self.useBN = False
+
+    def get_numb_fparam(self) :
+        return self.numb_fparam
+
+    def build (self, 
+               inputs,
+               fparam,
+               natoms,
+               bias_atom_e = None,
+               reuse = None,
+               suffix = '') :
+        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
+            t_dfparam = tf.constant(self.numb_fparam, 
+                                    name = 'dfparam', 
+                                    dtype = tf.int32)
+        start_index = 0
+        inputs = tf.reshape(inputs, [-1, self.dim_descrpt * natoms[0]])
+        shape = inputs.get_shape().as_list()
+
+        if bias_atom_e is not None :
+            assert(len(bias_atom_e) == self.ntypes)
+
+        for type_i in range(self.ntypes):
+            # cut-out inputs
+            inputs_i = tf.slice (inputs,
+                                 [ 0, start_index*      self.dim_descrpt],
+                                 [-1, natoms[2+type_i]* self.dim_descrpt] )
+            inputs_i = tf.reshape(inputs_i, [-1, self.dim_descrpt])
+            start_index += natoms[2+type_i]
+            if bias_atom_e is None :
+                type_bias_ae = 0.0
+            else :
+                type_bias_ae = bias_atom_e[type_i]
+
+            layer = inputs_i
+            if self.numb_fparam > 0 :
+                ext_fparam = tf.reshape(fparam, [-1, self.numb_fparam])
+                ext_fparam = tf.tile(ext_fparam, [1, natoms[2+type_i]])
+                ext_fparam = tf.reshape(ext_fparam, [-1, self.numb_fparam])
+                layer = tf.concat([layer, ext_fparam], axis = 1)
+            for ii in range(0,len(self.n_neuron)) :
+                if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] :
+                    layer+= self._one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed, use_timestep = self.resnet_dt)
+                else :
+                    layer = self._one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed)
+            final_layer = self._one_layer(layer, 1, activation_fn = None, bavg = type_bias_ae, name='final_layer_type_'+str(type_i)+suffix, reuse=reuse, seed = self.seed)
+            final_layer = tf.reshape(final_layer, [-1, natoms[2+type_i]])
+
+            # concat the results
+            if type_i == 0:
+                outs = final_layer
+            else:
+                outs = tf.concat([outs, final_layer], axis = 1)
+
+        return tf.reshape(outs, [-1])
+        
+
+    def _one_layer(self, 
+                   inputs, 
+                   outputs_size, 
+                   activation_fn=tf.nn.tanh, 
+                   stddev=1.0,
+                   bavg=0.0,
+                   name='linear', 
+                   reuse=None,
+                   seed=None, 
+                   use_timestep = False):
+        with tf.variable_scope(name, reuse=reuse):
+            shape = inputs.get_shape().as_list()
+            w = tf.get_variable('matrix', 
+                                [shape[1], outputs_size], 
+                                global_tf_float_precision,
+                                tf.random_normal_initializer(stddev=stddev/np.sqrt(shape[1]+outputs_size), seed = seed))
+            b = tf.get_variable('bias', 
+                                [outputs_size], 
+                                global_tf_float_precision,
+                                tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed))
+            hidden = tf.matmul(inputs, w) + b
+            if activation_fn != None and use_timestep :
+                idt = tf.get_variable('idt',
+                                      [outputs_size],
+                                      global_tf_float_precision,
+                                      tf.random_normal_initializer(stddev=0.001, mean = 0.1, seed = seed))
+            if activation_fn != None:
+                if self.useBN:
+                    None
+                    # hidden_bn = self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)   
+                    # return activation_fn(hidden_bn)
+                else:
+                    if use_timestep :
+                        return activation_fn(hidden) * idt
+                    else :
+                        return activation_fn(hidden)                    
+            else:
+                if self.useBN:
+                    None
+                    # return self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)
+                else:
+                    return hidden
diff --git a/source/train/LearningRate.py b/source/train/LearningRate.py
new file mode 100644
index 0000000000..f7eeda8d3c
--- /dev/null
+++ b/source/train/LearningRate.py
@@ -0,0 +1,25 @@
+import os,sys,warnings
+import numpy as np
+import tensorflow as tf
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+class LearningRateExp (object) :
+    def __init__ (self, 
+                  jdata) :
+        self.decay_steps_ = j_must_have(jdata, 'decay_steps')
+        self.decay_rate_ = j_must_have(jdata, 'decay_rate')
+        self.start_lr_ = j_must_have(jdata, 'start_lr')        
+
+    def build(self, global_step) :
+        return tf.train.exponential_decay(self.start_lr_, 
+                                          global_step,
+                                          self.decay_steps_,
+                                          self.decay_rate_, 
+                                          staircase=True)
+    def start_lr(self) :
+        return self.start_lr_
+
+    def value (self, 
+              batch) :
+        return self.start_lr_ * np.power (self.decay_rate_, (batch // self.decay_steps_))
+
diff --git a/source/train/Loss.py b/source/train/Loss.py
new file mode 100644
index 0000000000..78fa257654
--- /dev/null
+++ b/source/train/Loss.py
@@ -0,0 +1,76 @@
+import os,sys,warnings
+import numpy as np
+import tensorflow as tf
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+class LossStd () :
+    def __init__ (self, jdata, starter_learning_rate) :
+        self.starter_learning_rate = starter_learning_rate
+        self.start_pref_e = j_must_have (jdata, 'start_pref_e')
+        self.limit_pref_e = j_must_have (jdata, 'limit_pref_e')
+        self.start_pref_f = j_must_have (jdata, 'start_pref_f')
+        self.limit_pref_f = j_must_have (jdata, 'limit_pref_f')
+        self.start_pref_v = j_must_have (jdata, 'start_pref_v')
+        self.limit_pref_v = j_must_have (jdata, 'limit_pref_v')
+        self.start_pref_ae = 0
+        if j_have(jdata, 'start_pref_ae') :
+            self.start_pref_ae = jdata['start_pref_ae']
+        self.limit_pref_ae = 0
+        if j_have(jdata, 'limit_pref_ae') :
+            self.limit_pref_ae = jdata['limit_pref_ae']
+        self.has_e = (self.start_pref_e != 0 or self.limit_pref_e != 0)
+        self.has_f = (self.start_pref_f != 0 or self.limit_pref_f != 0)
+        self.has_v = (self.start_pref_v != 0 or self.limit_pref_v != 0)
+        self.has_ae = (self.start_pref_ae != 0 or self.limit_pref_ae != 0)
+
+    def build (self, 
+               learning_rate,
+               natoms,
+               prop_c,
+               energy, 
+               energy_hat,
+               force,
+               force_hat, 
+               virial,
+               virial_hat, 
+               atom_ener,
+               atom_ener_hat, 
+               suffix):
+        l2_ener_loss = tf.reduce_mean( tf.square(energy - energy_hat), name='l2_'+suffix)
+
+        force_reshape = tf.reshape (force, [-1])
+        force_hat_reshape = tf.reshape (force_hat, [-1])
+        l2_force_loss = tf.reduce_mean (tf.square(force_hat_reshape - force_reshape), name = "l2_force_" + suffix)
+
+        virial_reshape = tf.reshape (virial, [-1])
+        virial_hat_reshape = tf.reshape (virial_hat, [-1])
+        l2_virial_loss = tf.reduce_mean (tf.square(virial_hat_reshape - virial_reshape), name = "l2_virial_" + suffix)
+
+        atom_ener_reshape = tf.reshape (atom_ener, [-1])
+        atom_ener_hat_reshape = tf.reshape (atom_ener_hat, [-1])
+        l2_atom_ener_loss = tf.reduce_mean (tf.square(atom_ener_hat_reshape - atom_ener_reshape), name = "l2_atom_ener_" + suffix)
+
+        atom_norm  = 1./ global_cvt_2_tf_float(natoms[0]) 
+        atom_norm_ener  = 1./ global_cvt_2_ener_float(natoms[0]) 
+        pref_e = global_cvt_2_ener_float(prop_c[0] * (self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * learning_rate / self.starter_learning_rate) )
+        pref_f = global_cvt_2_tf_float(prop_c[1] * (self.limit_pref_f + (self.start_pref_f - self.limit_pref_f) * learning_rate / self.starter_learning_rate) )
+        pref_v = global_cvt_2_tf_float(prop_c[2] * (self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * learning_rate / self.starter_learning_rate) )
+        pref_ae= global_cvt_2_tf_float(prop_c[3] * (self.limit_pref_ae+ (self.start_pref_ae-self.limit_pref_ae) * learning_rate / self.starter_learning_rate) )
+
+        l2_loss = 0
+        if self.has_e :
+            l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
+        if self.has_f :
+            l2_loss += global_cvt_2_ener_float(pref_f * l2_force_loss)
+        if self.has_v :
+            l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss))
+        if self.has_ae :
+            l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss)
+
+        return l2_loss, l2_ener_loss, l2_force_loss, l2_virial_loss, l2_atom_ener_loss
diff --git a/source/train/Model.py b/source/train/Model.py
index 1294b5dcd6..c49483714e 100644
--- a/source/train/Model.py
+++ b/source/train/Model.py
@@ -1,214 +1,54 @@
-#!/usr/bin/env python3
-import os
-import sys
-import time
-import shutil
-import warnings
+import os,sys,warnings
 import numpy as np
 import tensorflow as tf
+from deepmd.TabInter import TabInter
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
 from deepmd.RunOptions import global_tf_float_precision
 from deepmd.RunOptions import global_np_float_precision
 from deepmd.RunOptions import global_ener_float_precision
 from deepmd.RunOptions import global_cvt_2_tf_float
 from deepmd.RunOptions import global_cvt_2_ener_float
 
-from tensorflow.python.framework import ops
-from tensorflow.python.client import timeline
-
-# load force module
 module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
 assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
 op_module = tf.load_op_library(module_path + "libop_abi.so")
 
-# load grad of force module
-sys.path.append (module_path )
-import deepmd._prod_force_grad
-import deepmd._prod_virial_grad
-import deepmd._prod_force_norot_grad
-import deepmd._prod_virial_norot_grad
-from deepmd.RunOptions import RunOptions
-
-def j_must_have (jdata, key) :
-    if not key in jdata.keys() :
-        raise RuntimeError ("json database must provide key " + key )
-    else :
-        return jdata[key]
-
-def j_must_have_d (jdata, key, deprecated_key) :
-    if not key in jdata.keys() :
-        # raise RuntimeError ("json database must provide key " + key )
-        for ii in deprecated_key :
-            if ii in jdata.keys() :
-                warnings.warn("the key \"%s\" is deprecated, please use \"%s\" instead" % (ii,key))
-                return jdata[ii]
-        raise RuntimeError ("json database must provide key " + key )        
-    else :
-        return jdata[key]
-
-def j_have (jdata, key) :
-    return key in jdata.keys() 
-
-def _is_subdir(path, directory):
-    path = os.path.realpath(path)
-    directory = os.path.realpath(directory)
-    if path == directory:
-        return False
-    relative = os.path.relpath(path, directory) + os.sep
-    return not relative.startswith(os.pardir + os.sep)
-
-class LearingRate (object) :
-    def __init__ (self, 
-                  jdata, 
-                  tot_numb_batches) :
-        self.decay_steps_ = j_must_have(jdata, 'decay_steps')
-        self.decay_rate_ = j_must_have(jdata, 'decay_rate')
-        self.start_lr_ = j_must_have(jdata, 'start_lr')        
-        self.tot_numb_batches = tot_numb_batches
-
-    def value (self, 
-              batch) :
-        return self.start_lr_ * np.power (self.decay_rate_, (batch // self.decay_steps()))
-
-    def decay_steps (self) :
-#        return self.decay_steps_ * self.tot_numb_batches
-        return self.decay_steps_
-    
-    def decay_rate (self) : 
-        return self.decay_rate_
-
-    def start_lr (self) :
-        return self.start_lr_
-
-class NNPModel (object):
-    def __init__(self, 
-                 jdata, 
-                 run_opt):
-        self.run_opt = run_opt
-        self._init_param(jdata)
-        self.null_mesh = tf.constant ([-1])
-
-    def _init_param(self, jdata):
-        # descrpt config
-        self.use_smooth = False
-        if j_have (jdata, "use_smooth") :
-            self.use_smooth = jdata["use_smooth"]
-        self.sel_a = j_must_have (jdata, 'sel_a')
-        self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
-        if not self.use_smooth :
-            self.sel_r = j_must_have (jdata, 'sel_r')
-        else :
-            if j_have (jdata, 'sel_r') :
-                warnings.warn ('ignoring key sel_r in the json database and set sel_r to %s' % str(self.sel_r))
-        self.rcut_a = -1
-        self.rcut_r = j_must_have (jdata, 'rcut')
-        if j_have(jdata, 'rcut_smth') :
-            self.rcut_r_smth = jdata['rcut_smth']
+class Model() :
+    def __init__ (self, jdata, descrpt, fitting):
+        self.descrpt = descrpt
+        self.rcut = self.descrpt.get_rcut()
+        self.ntypes = self.descrpt.get_ntypes()
+	# type_map
+        self.type_map = []
+        if j_have(jdata, 'type_map') :
+            self.type_map = jdata['type_map']
+        # fitting
+        self.fitting = fitting
+        self.numb_fparam = self.fitting.get_numb_fparam()
+        # short-range tab
+        if 'use_srtab' in jdata :
+            self.srtab = TabInter(jdata['use_srtab'])
+            self.smin_alpha = j_must_have(jdata, 'smin_alpha')
+            self.sw_rmin = j_must_have(jdata, 'sw_rmin')
+            self.sw_rmax = j_must_have(jdata, 'sw_rmax')
         else :
-            self.rcut_r_smth = self.rcut_r
-        # axis
-        self.axis_rule = []
-        if j_have (jdata, 'axis_rule') :
-            self.axis_rule = jdata['axis_rule']
-        # filter of smooth version
-        if self.use_smooth :
-            if j_have(jdata, 'coord_norm') :
-                self.coord_norm = jdata['coord_norm']
-            else :
-                self.coord_norm = True
-            self.filter_neuron = j_must_have (jdata, 'filter_neuron')
-            self.n_axis_neuron = j_must_have_d (jdata, 'axis_neuron', ['n_axis_neuron'])
-            self.filter_resnet_dt = False
-            if j_have(jdata, 'filter_resnet_dt') :
-                self.filter_resnet_dt = jdata['filter_resnet_dt']        
-        # numb of neighbors and numb of descrptors
-        self.nnei_a = np.cumsum(self.sel_a)[-1]
-        self.nnei_r = np.cumsum(self.sel_r)[-1]
-        self.nnei = self.nnei_a + self.nnei_r
-        self.ndescrpt_a = self.nnei_a * 4
-        self.ndescrpt_r = self.nnei_r * 1
-        self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        # network size
-        self.n_neuron = j_must_have_d (jdata, 'fitting_neuron', ['n_neuron'])
-        self.resnet_dt = True
-        if j_have(jdata, 'resnet_dt') :
-            warnings.warn("the key \"%s\" is deprecated, please use \"%s\" instead" % ('resnet_dt','fitting_resnet_dt'))
-            self.resnet_dt = jdata['resnet_dt']
-        if j_have(jdata, 'fitting_resnet_dt') :
-            self.resnet_dt = jdata['fitting_resnet_dt']
-        if self.use_smooth :            
-            if j_have(jdata, 'type_fitting_net') :
-                self.type_fitting_net = jdata['type_fitting_net']
-            else :
-                self.type_fitting_net = False            
-
-        self.numb_test = j_must_have (jdata, 'numb_test')
-        self.useBN = False
+            self.srtab = None
 
-        self.start_pref_e = j_must_have (jdata, 'start_pref_e')
-        self.limit_pref_e = j_must_have (jdata, 'limit_pref_e')
-        self.start_pref_f = j_must_have (jdata, 'start_pref_f')
-        self.limit_pref_f = j_must_have (jdata, 'limit_pref_f')
-        self.start_pref_v = j_must_have (jdata, 'start_pref_v')
-        self.limit_pref_v = j_must_have (jdata, 'limit_pref_v')
-        self.start_pref_ae = 0
-        if j_have(jdata, 'start_pref_ae') :
-            self.start_pref_ae = jdata['start_pref_ae']
-        self.limit_pref_ae = 0
-        if j_have(jdata, 'limit_pref_ae') :
-            self.limit_pref_ae = jdata['limit_pref_ae']
-        self.has_e = (self.start_pref_e != 0 or self.limit_pref_e != 0)
-        self.has_f = (self.start_pref_f != 0 or self.limit_pref_f != 0)
-        self.has_v = (self.start_pref_v != 0 or self.limit_pref_v != 0)
-        self.has_ae = (self.start_pref_ae != 0 or self.limit_pref_ae != 0)
 
-        self.disp_file = "lcurve.out"
-        if j_have (jdata, "disp_file") : self.disp_file = jdata["disp_file"]
-        self.disp_freq = j_must_have (jdata, 'disp_freq')
-        self.save_freq = j_must_have (jdata, 'save_freq')
-        self.save_ckpt = j_must_have (jdata, 'save_ckpt')
+    def get_rcut (self) :
+        return self.rcut
 
-        self.seed = None
-        if j_have (jdata, 'seed') :
-            self.seed = jdata['seed']
+    def get_ntypes (self) :
+        return self.ntypes
 
-        self.display_in_training = j_must_have (jdata, 'disp_training')
-        self.timing_in_training = j_must_have (jdata, 'time_training')
-        self.profiling = False
-        if j_have (jdata, 'profiling') :
-            self.profiling = jdata['profiling']
-            if self.profiling :
-                self.profiling_file = j_must_have (jdata, 'profiling_file')
+    def get_numb_fparam (self) :
+        return self.numb_fparam
 
-        self.sys_weights = None
-        if j_have(jdata, 'sys_weights') :
-            self.sys_weights = jdata['sys_weights']
+    def get_type_map (self) :
+        return self.type_map
 
-
-    def _message (self, msg) :
-        self.run_opt.message(msg)
-
-    def build (self, 
-               data, 
-               lr) :
-        self.lr = lr
-        self.ntypes = len(self.sel_a)
-        assert (self.ntypes == len(self.sel_r)), "size sel r array should match ntypes"
-        assert (self.ntypes == data.get_ntypes()), "ntypes should match that found in data"
-
-        self.batch_size = data.get_batch_size()
-
-        davg, dstd, bias_e = self._data_stat(data)
-
-        worker_device = "/job:%s/task:%d/%s" % (self.run_opt.my_job_name,
-                                                self.run_opt.my_task_index,
-                                                self.run_opt.my_device)
-        with tf.device(tf.train.replica_device_setter(worker_device = worker_device,
-                                                      cluster = self.run_opt.cluster_spec)):
-            self._build_lr(lr)
-            self._build_network(davg, dstd, bias_e)
-            self._build_training()
-
-    def _data_stat(self, data):
+    def data_stat(self, data):
         all_stat_coord = []
         all_stat_box = []
         all_stat_type = []
@@ -217,7 +57,7 @@ def _data_stat(self, data):
         for ii in range(data.get_nsystems()) :
             stat_prop_c, \
                 stat_energy, stat_force, stat_virial, start_atom_ener, \
-                stat_coord, stat_box, stat_type, natoms_vec, default_mesh \
+                stat_coord, stat_box, stat_type, stat_fparam, natoms_vec, default_mesh \
                 = data.get_batch (sys_idx = ii)
             natoms_vec = natoms_vec.astype(np.int32)            
             all_stat_coord.append(stat_coord)
@@ -226,966 +66,147 @@ def _data_stat(self, data):
             all_natoms_vec.append(natoms_vec)
             all_default_mesh.append(default_mesh)
 
-        if self.use_smooth and not self.coord_norm :
-            davg, dstd = self.no_norm_dstats ()
-            self._message("skipped coord/descrpt stats")
-        else :
-            davg, dstd = self.compute_dstats (all_stat_coord, all_stat_box, all_stat_type, all_natoms_vec, all_default_mesh)
-            self._message("computed coord/descrpt stats")
-        if self.run_opt.is_chief:
-            np.savetxt ("stat.avg.out", davg.T)
-            np.savetxt ("stat.std.out", dstd.T)
+        davg, dstd = self.compute_dstats (all_stat_coord, all_stat_box, all_stat_type, all_natoms_vec, all_default_mesh)
+        # if self.run_opt.is_chief:
+        #     np.savetxt ("stat.avg.out", davg.T)
+        #     np.savetxt ("stat.std.out", dstd.T)
 
         bias_atom_e = data.compute_energy_shift()
-        self._message("computed energy bias")
+        # self._message("computed energy bias")
 
         return davg, dstd, bias_atom_e
 
-    def _build_lr(self, lr):
-        self._extra_train_ops   = []
-        self.global_step = tf.train.get_or_create_global_step()
-        self.starter_learning_rate = lr.start_lr()
-        self.learning_rate = tf.train.exponential_decay(lr.start_lr(), 
-                                                        self.global_step,
-                                                        lr.decay_steps(),
-                                                        lr.decay_rate(), 
-                                                        staircase=True)
-        self._message("built lr")
-
-    def _build_network(self, davg, dstd, bias_atom_e):
-        self.t_avg = tf.get_variable('t_avg', 
-                                     davg.shape, 
-                                     dtype = global_tf_float_precision,
-                                     trainable = False,
-                                     initializer = tf.constant_initializer(davg, dtype = global_tf_float_precision))
-        self.t_std = tf.get_variable('t_std', 
-                                     dstd.shape, 
-                                     dtype = global_tf_float_precision,
-                                     trainable = False,
-                                     initializer = tf.constant_initializer(dstd, dtype = global_tf_float_precision))
-
-        t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), name = 't_rcut', dtype = global_tf_float_precision)
-        t_ntypes = tf.constant(self.ntypes, name = 't_ntypes', dtype = tf.int32)
-
-        self.t_prop_c           = tf.placeholder(tf.float32, [4],    name='t_prop_c')
-        self.t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
-        self.t_force            = tf.placeholder(global_tf_float_precision, [None], name='t_force')
-        self.t_virial           = tf.placeholder(global_tf_float_precision, [None], name='t_virial')
-        self.t_atom_ener        = tf.placeholder(global_tf_float_precision, [None], name='t_atom_ener')
-        self.t_coord            = tf.placeholder(global_tf_float_precision, [None], name='t_coord')
-        self.t_type             = tf.placeholder(tf.int32,   [None], name='t_type')
-        self.t_natoms           = tf.placeholder(tf.int32,   [self.ntypes+2], name='t_natoms')
-        self.t_box              = tf.placeholder(global_tf_float_precision, [None, 9], name='t_box')
-        self.t_mesh             = tf.placeholder(tf.int32,   [None], name='t_mesh')
-        self.is_training        = tf.placeholder(tf.bool)
-
-        self.batch_size_value = list(set(self.batch_size))
-        self.batch_size_value.sort()
-        self.numb_batch_size_value = len(self.batch_size_value)
-
-        self.energy_frz, self.force_frz, self.virial_frz, self.atom_ener_frz \
-            = self.build_interaction (1,
-                                      self.t_coord, 
-                                      self.t_type, 
-                                      self.t_natoms, 
-                                      self.t_box, 
-                                      self.t_mesh, 
-                                      bias_atom_e = bias_atom_e, 
-                                      suffix = "test", 
-                                      reuse = False)
-        self.energy_tst, self.force_tst, self.virial_tst, self.atom_ener_tst \
-            = self.build_interaction (self.numb_test,   
-                                      self.t_coord, 
-                                      self.t_type, 
-                                      self.t_natoms, 
-                                      self.t_box, 
-                                      self.t_mesh, 
-                                      bias_atom_e = bias_atom_e, 
-                                      suffix = "train_test", 
-                                      reuse = True)
-        self.energy_bch = []
-        self.force_bch = []
-        self.virial_bch = []
-        self.atom_ener_bch = []
-        for ii in range(self.numb_batch_size_value) :
-            tmp_energy_bch, tmp_force_bch, tmp_virial_bch, tmp_atom_ener_bch \
-                = self.build_interaction (self.batch_size_value[ii],  
-                                          self.t_coord, 
-                                          self.t_type, 
-                                          self.t_natoms, 
-                                          self.t_box, 
-                                          self.t_mesh, 
-                                          bias_atom_e = bias_atom_e, 
-                                          suffix = "train_batch_" + str(self.batch_size_value[ii]), 
-                                          reuse = True)
-            self.energy_bch.append(tmp_energy_bch)
-            self.force_bch.append(tmp_force_bch)
-            self.virial_bch.append(tmp_virial_bch)
-            self.atom_ener_bch.append(tmp_atom_ener_bch)
-
-        self.l2_l_tst, self.l2_el_tst, self.l2_fl_tst, self.l2_vl_tst, self.l2_ael_tst \
-            = self.loss (self.t_natoms, \
-                         self.t_prop_c, \
-                         self.t_energy, self.energy_tst, \
-                         self.t_force, self.force_tst, \
-                         self.t_virial, self.virial_tst, \
-                         self.t_atom_ener, self.atom_ener_tst, \
-                         suffix = "train_test")
-        self.l2_l_bch = []
-        self.l2_el_bch = []
-        self.l2_fl_bch = []
-        self.l2_vl_bch = []
-        self.l2_ael_bch = []
-        for ii in range(self.numb_batch_size_value) :                    
-            tmp_l2_l_bch, tmp_l2_el_bch, tmp_l2_fl_bch, tmp_l2_vl_bch, tmp_l2_ael_bch \
-                = self.loss (self.t_natoms, \
-                             self.t_prop_c, \
-                             self.t_energy, self.energy_bch[ii], \
-                             self.t_force, self.force_bch[ii], \
-                             self.t_virial, self.virial_bch[ii], \
-                             self.t_atom_ener, self.atom_ener_bch[ii], \
-                             suffix = "train_batch_" + str(self.batch_size_value[ii]))
-            self.l2_l_bch.append(tmp_l2_l_bch)
-            self.l2_el_bch.append(tmp_l2_el_bch)
-            self.l2_fl_bch.append(tmp_l2_fl_bch)
-            self.l2_vl_bch.append(tmp_l2_vl_bch)
-            self.l2_ael_bch.append(tmp_l2_ael_bch)
-
-        self._message("built network")
-
-    def _build_training(self):
-        self.train_op = []
-        trainable_variables = tf.trainable_variables()
-        optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
-        if self.run_opt.is_distrib :
-            optimizer = tf.train.SyncReplicasOptimizer(
-                optimizer,
-                replicas_to_aggregate = self.run_opt.cluster_spec.num_tasks("worker"),
-                total_num_replicas = self.run_opt.cluster_spec.num_tasks("worker"),
-                name = "sync_replicas")
-            self.sync_replicas_hook = optimizer.make_session_run_hook(self.run_opt.is_chief)            
-        for ii in range(self.numb_batch_size_value) :
-            grads = tf.gradients(self.l2_l_bch[ii], trainable_variables)
-            apply_op = optimizer.apply_gradients (zip (grads, trainable_variables),
-                                                  global_step=self.global_step,
-                                                  name='train_step')
-            train_ops = [apply_op] + self._extra_train_ops
-            self.train_op.append(tf.group(*train_ops))
-        self._message("built training")
-
-    def _init_sess_serial(self) :
-        self.sess = tf.Session(
-            config=tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads, 
-                                  inter_op_parallelism_threads=self.run_opt.num_inter_threads
-            ))
-        self.saver = tf.train.Saver()
-        saver = self.saver
-        if self.run_opt.init_mode == 'init_from_scratch' :
-            self._message("initialize model from scratch")
-            init_op = tf.global_variables_initializer()
-            self.sess.run(init_op)
-            fp = open(self.disp_file, "w")
-            fp.close ()
-        elif self.run_opt.init_mode == 'init_from_model' :
-            self._message("initialize from model %s" % self.run_opt.init_model)
-            init_op = tf.global_variables_initializer()
-            self.sess.run(init_op)
-            saver.restore (self.sess, self.run_opt.init_model)            
-            self.sess.run(self.global_step.assign(0))
-            fp = open(self.disp_file, "w")
-            fp.close ()
-        elif self.run_opt.init_mode == 'restart' :
-            self._message("restart from model %s" % self.run_opt.restart)
-            init_op = tf.global_variables_initializer()
-            self.sess.run(init_op)
-            saver.restore (self.sess, self.run_opt.restart)
-        else :
-            raise RuntimeError ("unkown init mode")
-
-    def _init_sess_distrib(self):
-        ckpt_dir = os.path.join(os.getcwd(), self.save_ckpt)
-        assert(_is_subdir(ckpt_dir, os.getcwd())), "the checkpoint dir must be a subdir of the current dir"
-        if self.run_opt.init_mode == 'init_from_scratch' :
-            self._message("initialize model from scratch")
-            if self.run_opt.is_chief :
-                if os.path.exists(ckpt_dir):
-                    shutil.rmtree(ckpt_dir)
-                if not os.path.exists(ckpt_dir) :
-                    os.makedirs(ckpt_dir)
-                fp = open(self.disp_file, "w")
-                fp.close ()
-        elif self.run_opt.init_mode == 'init_from_model' :
-            raise RuntimeError("distributed training does not support %s" % self.run_opt.init_mode)
-        elif self.run_opt.init_mode == 'restart' :
-            self._message("restart from model %s" % ckpt_dir)
-            if self.run_opt.is_chief :
-                assert(os.path.isdir(ckpt_dir)), "the checkpoint dir %s should exists" % ckpt_dir
-        else :
-            raise RuntimeError ("unkown init mode")
-
-        saver = tf.train.Saver(max_to_keep = 1)
-        self.saver = None
-        # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
-        # config = tf.ConfigProto(allow_soft_placement=True,
-        #                         gpu_options = gpu_options,
-        #                         intra_op_parallelism_threads=self.run_opt.num_intra_threads,
-        #                         inter_op_parallelism_threads=self.run_opt.num_inter_threads)
-        config = tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads,
-                                inter_op_parallelism_threads=self.run_opt.num_inter_threads)
-        # The stop_hook handles stopping after running given steps
-        # stop_hook = tf.train.StopAtStepHook(last_step = stop_batch)
-        # hooks = [self.sync_replicas_hook, stop_hook]
-        hooks = [self.sync_replicas_hook]
-        scaffold = tf.train.Scaffold(saver=saver)
-        # Use monitor session for distributed computation
-        self.sess = tf.train.MonitoredTrainingSession(master = self.run_opt.server.target,
-                                                      is_chief = self.run_opt.is_chief,
-                                                      config = config,
-                                                      hooks = hooks,
-                                                      scaffold = scaffold,
-                                                      checkpoint_dir = ckpt_dir)
-        # ,
-        # save_checkpoint_steps = self.save_freq)
-
-    def train (self, 
-               data, 
-               stop_batch) :
-        if self.run_opt.is_distrib :
-            self._init_sess_distrib()
-        else :
-            self._init_sess_serial()
-
-        self.print_head()
-        fp = None
-        if self.run_opt.is_chief :
-            fp = open(self.disp_file, "a")
-
-        cur_batch = self.sess.run(self.global_step)
-        self.cur_batch = cur_batch
-        self.run_opt.message("start training at lr %.2e (== %.2e), final lr will be %.2e" % 
-                             (self.sess.run(self.learning_rate),
-                              self.lr.value(cur_batch), 
-                              self.lr.value(stop_batch)) 
-        )
-
-        prf_options = None
-        prf_run_metadata = None
-        if self.profiling :
-            prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
-            prf_run_metadata = tf.RunMetadata()
-
-        train_time = 0
-        while cur_batch < stop_batch :
-            batch_prop_c, \
-                batch_energy, batch_force, batch_virial, batch_atom_ener, \
-                batch_coord, batch_box, batch_type, \
-                natoms_vec, \
-                default_mesh \
-                = data.get_batch (sys_weights = self.sys_weights)
-            cur_batch_size = batch_energy.shape[0]
-            cur_bs_idx = self.batch_size_value.index(cur_batch_size)
-            feed_dict_batch = {self.t_prop_c:        batch_prop_c,
-                               self.t_energy:        batch_energy, 
-                               self.t_force:         np.reshape(batch_force, [-1]),
-                               self.t_virial:        np.reshape(batch_virial, [-1]),
-                               self.t_atom_ener:     np.reshape(batch_atom_ener, [-1]),
-                               self.t_coord:         np.reshape(batch_coord, [-1]),
-                               self.t_box:           batch_box,
-                               self.t_type:          np.reshape(batch_type, [-1]),
-                               self.t_natoms:        natoms_vec,
-                               self.t_mesh:          default_mesh,
-                               self.is_training:     True}
-            if self.display_in_training and cur_batch == 0 :
-                self.test_on_the_fly(fp, data, feed_dict_batch, cur_bs_idx)
-            if self.timing_in_training : tic = time.time()
-            self.sess.run([self.train_op[cur_bs_idx]], feed_dict = feed_dict_batch, options=prf_options, run_metadata=prf_run_metadata)
-            if self.timing_in_training : toc = time.time()
-            if self.timing_in_training : train_time += toc - tic
-            cur_batch = self.sess.run(self.global_step)
-            self.cur_batch = cur_batch
-
-            if self.display_in_training and (cur_batch % self.disp_freq == 0) :
-                tic = time.time()
-                self.test_on_the_fly(fp, data, feed_dict_batch, cur_bs_idx)
-                toc = time.time()
-                test_time = toc - tic
-                if self.timing_in_training :
-                    self._message("batch %7d training time %.2f s, testing time %.2f s"
-                                  % (cur_batch, train_time, test_time))
-                    train_time = 0
-                if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.run_opt.is_chief :
-                    if self.saver is not None :
-                        self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt)
-                        self._message("saved checkpoint %s" % self.save_ckpt)
-        if self.run_opt.is_chief: 
-            fp.close ()
-        if self.profiling and self.run_opt.is_chief :
-            fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats)
-            chrome_trace = fetched_timeline.generate_chrome_trace_format()
-            with open(self.profiling_file, 'w') as f:
-                f.write(chrome_trace)
-
-    def get_global_step (self) :
-        return self.sess.run(self.global_step)
-
-    def print_head (self) :
-        if self.run_opt.is_chief:
-            fp = open(self.disp_file, "a")
-            print_str = "# %5s" % 'batch'
-            prop_fmt = '   %9s %9s'
-            print_str += prop_fmt % ('l2_tst', 'l2_trn')
-            if self.has_e :
-                print_str += prop_fmt % ('l2_e_tst', 'l2_e_trn')
-            if self.has_ae :
-                print_str += prop_fmt % ('l2_ae_tst', 'l2_ae_trn')
-            if self.has_f :
-                print_str += prop_fmt % ('l2_f_tst', 'l2_f_trn')
-            if self.has_v :
-                print_str += prop_fmt % ('l2_v_tst', 'l2_v_trn')
-            print_str += '   %8s\n' % 'lr'
-            fp.write(print_str)
-            fp.close ()
-
-    def test_on_the_fly (self,
-                         fp,
-                         data,
-                         feed_dict_batch, 
-                         ii) :
-        test_prop_c, \
-            test_energy, test_force, test_virial, test_atom_ener, \
-            test_coord, test_box, test_type, \
-            natoms_vec, \
-            default_mesh \
-            = data.get_test ()
-        feed_dict_test = {self.t_prop_c:        test_prop_c,
-                          self.t_energy:        test_energy              [:self.numb_test],
-                          self.t_force:         np.reshape(test_force    [:self.numb_test, :], [-1]),
-                          self.t_virial:        np.reshape(test_virial   [:self.numb_test, :], [-1]),
-                          self.t_atom_ener:     np.reshape(test_atom_ener[:self.numb_test, :], [-1]),
-                          self.t_coord:         np.reshape(test_coord    [:self.numb_test, :], [-1]),
-                          self.t_box:           test_box                 [:self.numb_test, :],
-                          self.t_type:          np.reshape(test_type     [:self.numb_test, :], [-1]),
-                          self.t_natoms:        natoms_vec,
-                          self.t_mesh:          default_mesh,
-                          self.is_training:     False}
-        error_test, error_e_test, error_f_test, error_v_test, error_ae_test \
-            = self.sess.run([self.l2_l_tst, \
-                             self.l2_el_tst, \
-                             self.l2_fl_tst, \
-                             self.l2_vl_tst, \
-                             self.l2_ael_tst], 
-                            feed_dict=feed_dict_test)
-        error_train, error_e_train, error_f_train, error_v_train, error_ae_train \
-            = self.sess.run([self.l2_l_bch[ii], \
-                             self.l2_el_bch[ii], \
-                             self.l2_fl_bch[ii], \
-                             self.l2_vl_bch[ii], \
-                             self.l2_ael_bch[ii]], 
-                            feed_dict=feed_dict_batch)
-        cur_batch = self.cur_batch
-        current_lr = self.sess.run(self.learning_rate)
-        if self.run_opt.is_chief:
-            print_str = "%7d" % cur_batch
-            prop_fmt = "   %9.2e %9.2e"
-            print_str += prop_fmt % (np.sqrt(error_test), np.sqrt(error_train))
-            if self.has_e :
-                print_str += prop_fmt % (np.sqrt(error_e_test) / natoms_vec[0], np.sqrt(error_e_train) / natoms_vec[0])
-            if self.has_ae :
-                print_str += prop_fmt % (np.sqrt(error_ae_test), np.sqrt(error_ae_train))
-            if self.has_f :
-                print_str += prop_fmt % (np.sqrt(error_f_test), np.sqrt(error_f_train))
-            if self.has_v :
-                print_str += prop_fmt % (np.sqrt(error_v_test) / natoms_vec[0], np.sqrt(error_v_train) / natoms_vec[0])
-            print_str += "   %8.1e\n" % current_lr
-            fp.write(print_str)
-            fp.flush ()
-
-    def compute_dstats_sys_smth (self,
-                                 data_coord, 
-                                 data_box, 
-                                 data_atype, 
-                                 natoms_vec,
-                                 mesh,
-                                 reuse = None) :    
-        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
-        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
-        sub_graph = tf.Graph()
-        with sub_graph.as_default():
-            descrpt, descrpt_deriv, rij, nlist \
-                = op_module.descrpt_norot (tf.constant(data_coord),
-                                           tf.constant(data_atype),
-                                           tf.constant(natoms_vec, dtype = tf.int32),
-                                           tf.constant(data_box),
-                                           tf.constant(mesh),
-                                           tf.constant(avg_zero),
-                                           tf.constant(std_ones),
-                                           rcut_a = self.rcut_a,
-                                           rcut_r = self.rcut_r,
-                                           rcut_r_smth = self.rcut_r_smth,
-                                           sel_a = self.sel_a,
-                                           sel_r = self.sel_r)
-        # self.sess.run(tf.global_variables_initializer())
-        sub_sess = tf.Session(graph = sub_graph,
-                              config=tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads, 
-                                                    inter_op_parallelism_threads=self.run_opt.num_inter_threads
-
-                              ))
-        dd_all = sub_sess.run(descrpt)
-        sub_sess.close()
-        natoms = natoms_vec
-        dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
-        start_index = 0
-        sysr = []
-        sysa = []
-        sysn = []
-        sysr2 = []
-        sysa2 = []
-        for type_i in range(self.ntypes):
-            end_index = start_index + self.ndescrpt * natoms[2+type_i]
-            dd = dd_all[:, start_index:end_index]
-            dd = np.reshape(dd, [-1, self.ndescrpt])
-            start_index = end_index        
-            # compute
-            dd = np.reshape (dd, [-1, 4])
-            ddr = dd[:,:1]
-            dda = dd[:,1:]
-            sumr = np.sum(ddr)
-            suma = np.sum(dda) / 3.
-            sumn = dd.shape[0]
-            sumr2 = np.sum(np.multiply(ddr, ddr))
-            suma2 = np.sum(np.multiply(dda, dda)) / 3.
-            sysr.append(sumr)
-            sysa.append(suma)
-            sysn.append(sumn)
-            sysr2.append(sumr2)
-            sysa2.append(suma2)
-        return sysr, sysr2, sysa, sysa2, sysn
-
-    def compute_dstats_sys_nonsmth (self,
-                                    data_coord, 
-                                    data_box, 
-                                    data_atype, 
-                                    natoms_vec,
-                                    mesh,
-                                    reuse = None) :    
-        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
-        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
-        sub_graph = tf.Graph()
-        with sub_graph.as_default():
-            descrpt, descrpt_deriv, rij, nlist, axis \
-                = op_module.descrpt (tf.constant(data_coord),
-                                     tf.constant(data_atype),
-                                     tf.constant(natoms_vec, dtype = tf.int32),
-                                     tf.constant(data_box),
-                                     tf.constant(mesh),
-                                     tf.constant(avg_zero),
-                                     tf.constant(std_ones),
-                                     rcut_a = self.rcut_a,
-                                     rcut_r = self.rcut_r,
-                                     sel_a = self.sel_a,
-                                     sel_r = self.sel_r,
-                                     axis_rule = self.axis_rule)
-        # self.sess.run(tf.global_variables_initializer())
-        sub_sess = tf.Session(graph = sub_graph, 
-                              config=tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads, 
-                                                    inter_op_parallelism_threads=self.run_opt.num_inter_threads
-                              ))
-        dd_all = sub_sess.run(descrpt)
-        sub_sess.close()
-        natoms = natoms_vec
-        dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
-        start_index = 0
-        sysv = []
-        sysn = []
-        sysv2 = []
-        for type_i in range(self.ntypes):
-            end_index = start_index + self.ndescrpt * natoms[2+type_i]
-            dd = dd_all[:, start_index:end_index]
-            dd = np.reshape(dd, [-1, self.ndescrpt])
-            start_index = end_index        
-            # compute
-            sumv = np.sum(dd, axis = 0)
-            sumn = dd.shape[0]
-            sumv2 = np.sum(np.multiply(dd,dd), axis = 0)            
-            sysv.append(sumv)
-            sysn.append(sumn)
-            sysv2.append(sumv2)
-        return sysv, sysv2, sysn
-
-
-    def compute_std (self,sumv2, sumv, sumn) :
-        return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
-
     def compute_dstats (self,
                         data_coord, 
                         data_box, 
                         data_atype, 
                         natoms_vec,
                         mesh,
-                        reuse = None) :    
-        env_bk = None
-        if 'TF_CPP_MIN_LOG_LEVEL' in os.environ:
-            env_bk = os.environ['TF_CPP_MIN_LOG_LEVEL']
-        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
-        all_davg = []
-        all_dstd = []
-        if self.use_smooth:
-            sumr = []
-            suma = []
-            sumn = []
-            sumr2 = []
-            suma2 = []
-            for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
-                sysr,sysr2,sysa,sysa2,sysn \
-                    = self.compute_dstats_sys_smth(cc,bb,tt,nn,mm,reuse)
-                sumr.append(sysr)
-                suma.append(sysa)
-                sumn.append(sysn)
-                sumr2.append(sysr2)
-                suma2.append(sysa2)
-            sumr = np.sum(sumr, axis = 0)
-            suma = np.sum(suma, axis = 0)
-            sumn = np.sum(sumn, axis = 0)
-            sumr2 = np.sum(sumr2, axis = 0)
-            suma2 = np.sum(suma2, axis = 0)
-            for type_i in range(self.ntypes) :
-                davgunit = [sumr[type_i]/sumn[type_i], 0, 0, 0]
-                dstdunit = [self.compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]), 
-                            self.compute_std(suma2[type_i], suma[type_i], sumn[type_i]), 
-                            self.compute_std(suma2[type_i], suma[type_i], sumn[type_i]), 
-                            self.compute_std(suma2[type_i], suma[type_i], sumn[type_i])
-                            ]
-                davg = np.tile(davgunit, self.ndescrpt // 4)
-                dstd = np.tile(dstdunit, self.ndescrpt // 4)
-                all_davg.append(davg)
-                all_dstd.append(dstd)
-        else :
-            sumv = []
-            sumn = []
-            sumv2 = []
-            for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
-                sysv,sysv2,sysn \
-                    = self.compute_dstats_sys_nonsmth(cc,bb,tt,nn,mm,reuse)
-                sumv.append(sysv)
-                sumn.append(sysn)
-                sumv2.append(sysv2)
-            sumv = np.sum(sumv, axis = 0)
-            sumn = np.sum(sumn, axis = 0)
-            sumv2 = np.sum(sumv2, axis = 0)
-            for type_i in range(self.ntypes) :
-                davg = sumv[type_i] /  sumn[type_i]
-                dstd = self.compute_std(sumv2[type_i], sumv[type_i], sumn[type_i])
-                for ii in range (len(dstd)) :
-                    if (np.abs(dstd[ii]) < 1e-2) :
-                        dstd[ii] = 1e-2            
-                all_davg.append(davg)
-                all_dstd.append(dstd)
-
-        davg = np.array(all_davg)
-        dstd = np.array(all_dstd)
-        if env_bk is not None :
-            os.environ['TF_CPP_MIN_LOG_LEVEL'] = env_bk
-        else :
-            os.environ.pop('TF_CPP_MIN_LOG_LEVEL', None)
-        return davg, dstd
-
-    def no_norm_dstats (self, avgv = 0, stdv = 1) :
-        davg = np.zeros([self.ntypes, self.ndescrpt]) + avgv
-        dstd = np.ones ([self.ntypes, self.ndescrpt]) * stdv
-        return davg, dstd
-
-    def loss (self, 
-              natoms,
-              prop_c,
-              energy, 
-              energy_hat,
-              force,
-              force_hat, 
-              virial,
-              virial_hat, 
-              atom_ener,
-              atom_ener_hat, 
-              suffix):
-        l2_ener_loss = tf.reduce_mean( tf.square(energy - energy_hat), name='l2_'+suffix)
-
-        force_reshape = tf.reshape (force, [-1])
-        force_hat_reshape = tf.reshape (force_hat, [-1])
-        l2_force_loss = tf.reduce_mean (tf.square(force_hat_reshape - force_reshape), name = "l2_force_" + suffix)
-
-        virial_reshape = tf.reshape (virial, [-1])
-        virial_hat_reshape = tf.reshape (virial_hat, [-1])
-        l2_virial_loss = tf.reduce_mean (tf.square(virial_hat_reshape - virial_reshape), name = "l2_virial_" + suffix)
-
-        atom_ener_reshape = tf.reshape (atom_ener, [-1])
-        atom_ener_hat_reshape = tf.reshape (atom_ener_hat, [-1])
-        l2_atom_ener_loss = tf.reduce_mean (tf.square(atom_ener_hat_reshape - atom_ener_reshape), name = "l2_atom_ener_" + suffix)
-
-        atom_norm  = 1./ global_cvt_2_tf_float(natoms[0]) 
-        atom_norm_ener  = 1./ global_cvt_2_ener_float(natoms[0]) 
-        pref_e = global_cvt_2_ener_float(prop_c[0] * (self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * self.learning_rate / self.starter_learning_rate) )
-        pref_f = global_cvt_2_tf_float(prop_c[1] * (self.limit_pref_f + (self.start_pref_f - self.limit_pref_f) * self.learning_rate / self.starter_learning_rate) )
-        pref_v = global_cvt_2_tf_float(prop_c[2] * (self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * self.learning_rate / self.starter_learning_rate) )
-        pref_ae= global_cvt_2_tf_float(prop_c[3] * (self.limit_pref_ae+ (self.start_pref_ae-self.limit_pref_ae) * self.learning_rate / self.starter_learning_rate) )
-
-        l2_loss = 0
-        if self.has_e :
-            l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
-        if self.has_f :
-            l2_loss += global_cvt_2_ener_float(pref_f * l2_force_loss)
-        if self.has_v :
-            l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss))
-        if self.has_ae :
-            l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss)
-
-        return l2_loss, l2_ener_loss, l2_force_loss, l2_virial_loss, l2_atom_ener_loss
+                        reuse = None) :        
+        return self.descrpt.compute_dstats(data_coord, data_box, data_atype, natoms_vec, mesh, reuse)
+    
+    def build (self, 
+               coord_, 
+               atype_,
+               natoms,
+               box, 
+               mesh,
+               fparam,
+               davg = None, 
+               dstd = None,
+               bias_atom_e = None,
+               suffix = '', 
+               reuse = None):
+
+        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
+            t_tmap = tf.constant(' '.join(self.type_map), 
+                                 name = 'tmap', 
+                                 dtype = tf.string)
+
+            if self.srtab is not None :
+                tab_info, tab_data = self.srtab.get()
+                self.tab_info = tf.get_variable('t_tab_info',
+                                                tab_info.shape,
+                                                dtype = tf.float64,
+                                                trainable = False,
+                                                initializer = tf.constant_initializer(tab_info, dtype = tf.float64))
+                self.tab_data = tf.get_variable('t_tab_data',
+                                                tab_data.shape,
+                                                dtype = tf.float64,
+                                                trainable = False,
+                                                initializer = tf.constant_initializer(tab_data, dtype = tf.float64))
 
-    def build_interaction (self, 
-                           nframes,
-                           coord_, 
-                           atype_,
-                           natoms,
-                           box, 
-                           mesh,
-                           suffix, 
-                           bias_atom_e = None,
-                           reuse = None):        
         coord = tf.reshape (coord_, [-1, natoms[1] * 3])
         atype = tf.reshape (atype_, [-1, natoms[1]])
 
-        if self.use_smooth :
-            descrpt, descrpt_deriv, rij, nlist \
-                = op_module.descrpt_norot (coord,
-                                           atype,
-                                           natoms,
-                                           box,                                    
-                                           mesh,
-                                           self.t_avg,
-                                           self.t_std,
-                                           rcut_a = self.rcut_a,
-                                           rcut_r = self.rcut_r,
-                                           rcut_r_smth = self.rcut_r_smth,
-                                           sel_a = self.sel_a,
-                                           sel_r = self.sel_r)
-        else :
-            descrpt, descrpt_deriv, rij, nlist, axis \
-                = op_module.descrpt (coord,
-                                     atype,
-                                     natoms,
-                                     box,                                    
-                                     mesh,
-                                     self.t_avg,
-                                     self.t_std,
-                                     rcut_a = self.rcut_a,
-                                     rcut_r = self.rcut_r,
-                                     sel_a = self.sel_a,
-                                     sel_r = self.sel_r,
-                                     axis_rule = self.axis_rule)
-
-        descrpt_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
-        
-        atom_ener = self.build_atom_net (nframes, descrpt_reshape, natoms, bias_atom_e = bias_atom_e, reuse = reuse)
-
-        energy_raw = tf.reshape(atom_ener, [-1, natoms[0]], name = 'atom_energy_'+suffix)
-        energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1, name='energy_'+suffix)
-
-        net_deriv_tmp = tf.gradients (atom_ener, descrpt_reshape)
-        net_deriv = net_deriv_tmp[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, natoms[0] * self.ndescrpt])
-
-        if self.use_smooth :
-            force = op_module.prod_force_norot (net_deriv_reshape,
-                                                descrpt_deriv,
-                                                nlist,
-                                                natoms,
-                                                n_a_sel = self.nnei_a,
-                                                n_r_sel = self.nnei_r)
-        else :
-            force = op_module.prod_force (net_deriv_reshape,
-                                          descrpt_deriv,
-                                          nlist,
-                                          axis,
-                                          natoms,
-                                          n_a_sel = self.nnei_a,
-                                          n_r_sel = self.nnei_r)
-        force = tf.reshape (force, [-1, 3 * natoms[1]], name = "force_"+suffix)
-
-        if self.use_smooth :
-            virial, atom_virial \
-                = op_module.prod_virial_norot (net_deriv_reshape,
-                                               descrpt_deriv,
-                                               rij,
-                                               nlist,
-                                               natoms,
-                                               n_a_sel = self.nnei_a,
-                                               n_r_sel = self.nnei_r)
+        dout \
+            = self.descrpt.build(coord_,
+                                 atype_,
+                                 natoms,
+                                 box,
+                                 mesh,
+                                 davg = davg,
+                                 dstd = dstd,
+                                 suffix = suffix,
+                                 reuse = reuse)
+
+        if self.srtab is not None :
+            nlist, rij, sel_a, sel_r = self.descrpt.get_nlist()
+            nnei_a = np.cumsum(sel_a)[-1]
+            nnei_r = np.cumsum(sel_r)[-1]
+
+        atom_ener = self.fitting.build (dout, 
+                                        fparam, 
+                                        natoms, 
+                                        bias_atom_e = bias_atom_e, 
+                                        reuse = reuse, 
+                                        suffix = suffix)
+
+        if self.srtab is not None :
+            sw_lambda, sw_deriv \
+                = op_module.soft_min_switch(atype, 
+                                            rij, 
+                                            nlist,
+                                            natoms,
+                                            sel_a = sel_a,
+                                            sel_r = sel_r,
+                                            alpha = self.smin_alpha,
+                                            rmin = self.sw_rmin,
+                                            rmax = self.sw_rmax)            
+            inv_sw_lambda = 1.0 - sw_lambda
+            # NOTICE:
+            # atom energy is not scaled, 
+            # force and virial are scaled
+            tab_atom_ener, tab_force, tab_atom_virial \
+                = op_module.tab_inter(self.tab_info,
+                                      self.tab_data,
+                                      atype,
+                                      rij,
+                                      nlist,
+                                      natoms,
+                                      sw_lambda,
+                                      sel_a = sel_a,
+                                      sel_r = sel_r)
+            energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, natoms[0]])
+            tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
+            atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
+            energy_raw = tab_atom_ener + atom_ener
         else :
-            virial, atom_virial \
-                = op_module.prod_virial (net_deriv_reshape,
-                                         descrpt_deriv,
-                                         rij,
-                                         nlist,
-                                         axis,
-                                         natoms,
-                                         n_a_sel = self.nnei_a,
-                                         n_r_sel = self.nnei_r)
-        virial = tf.reshape (virial, [-1, 9], name = "virial_"+suffix)
-        atom_virial = tf.reshape (atom_virial, [-1, 9 * natoms[1]], name = "atom_virial_"+suffix)
-
-        return energy, force, virial, energy_raw
-    
-    def build_atom_net (self, 
-                        nframes,
-                        inputs, 
-                        natoms,
-                        bias_atom_e = None,
-                        reuse = None) :
-        start_index = 0
-        inputs = tf.reshape(inputs, [-1, self.ndescrpt * natoms[0]])
-        shape = inputs.get_shape().as_list()
-        if bias_atom_e is not None :
-            assert(len(bias_atom_e) == self.ntypes)
+            energy_raw = atom_ener
 
-        for type_i in range(self.ntypes):
-            # cut-out inputs
-            inputs_i = tf.slice (inputs,
-                                 [ 0, start_index*      self.ndescrpt],
-                                 [-1, natoms[2+type_i]* self.ndescrpt] )
-            inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
-            start_index += natoms[2+type_i]
-            if bias_atom_e is None :
-                type_bias_ae = 0.0
-            else :
-                type_bias_ae = bias_atom_e[type_i]
+        energy_raw = tf.reshape(energy_raw, [-1, natoms[0]], name = 'o_atom_energy'+suffix)
+        energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1, name='o_energy'+suffix)
 
-            # compute atom energy
-            if self.use_smooth :
-                if self.type_fitting_net :
-                    layer = self._DS_layer_type_ext(inputs_i, name='DS_layer_type_'+str(type_i), natoms=natoms, reuse=reuse, seed = self.seed)
-                else :
-                    layer = self._DS_layer(inputs_i, name='DS_layer_type_'+str(type_i), natoms=natoms, reuse=reuse, seed = self.seed)
-                for ii in range(0,len(self.n_neuron)) :
-                    if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1] :
-                        layer+= self._one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i), reuse=reuse, seed = self.seed, use_timestep = self.resnet_dt)
-                    else :
-                        layer = self._one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i), reuse=reuse, seed = self.seed)
-            else :
-                layer = self._one_layer(inputs_i, self.n_neuron[0], name='layer_0_type_'+str(type_i), reuse=reuse, seed = self.seed)
-                for ii in range(1,len(self.n_neuron)) :
-                    layer = self._one_layer(layer, self.n_neuron[ii], name='layer_'+str(ii)+'_type_'+str(type_i), reuse=reuse, seed = self.seed)
-            final_layer = self._one_layer(layer, 1, activation_fn = None, bavg = type_bias_ae, name='final_layer_type_'+str(type_i), reuse=reuse, seed = self.seed)
-            final_layer = tf.reshape(final_layer, [nframes, natoms[2+type_i]])
-            # final_layer = tf.cond (tf.equal(natoms[2+type_i], 0), lambda: tf.zeros((0, 0), dtype=global_tf_float_precision), lambda : tf.reshape(final_layer, [-1, natoms[2+type_i]]))
+        force, virial, atom_virial \
+            = self.descrpt.prod_force_virial (atom_ener, natoms)
 
-            # concat the results
-            if type_i == 0:
-                outs = final_layer
-            else:
-                outs = tf.concat([outs, final_layer], axis = 1)
-
-
-        return tf.reshape(outs, [-1])
-
-    def _one_layer(self, 
-                   inputs, 
-                   outputs_size, 
-                   activation_fn=tf.nn.tanh, 
-                   stddev=1.0,
-                   bavg=0.0,
-                   name='linear', 
-                   reuse=None,
-                   seed=None, 
-                   use_timestep = False):
-        with tf.variable_scope(name, reuse=reuse):
-            shape = inputs.get_shape().as_list()
-            w = tf.get_variable('matrix', 
-                                [shape[1], outputs_size], 
-                                global_tf_float_precision,
-                                tf.random_normal_initializer(stddev=stddev/np.sqrt(shape[1]+outputs_size), seed = seed))
-            b = tf.get_variable('bias', 
-                                [outputs_size], 
-                                global_tf_float_precision,
-                                tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed))
-            hidden = tf.matmul(inputs, w) + b
-            if activation_fn != None and use_timestep :
-                idt = tf.get_variable('idt',
-                                      [outputs_size],
-                                      global_tf_float_precision,
-                                      tf.random_normal_initializer(stddev=0.001, mean = 0.1, seed = seed))
-
-        if activation_fn != None:
-            if self.useBN:
-                None
-                # hidden_bn = self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)   
-                # return activation_fn(hidden_bn)
-            else:
-                if use_timestep :
-                    return activation_fn(hidden) * idt
-                else :
-                    return activation_fn(hidden)                    
-        else:
-            if self.useBN:
-                None
-                # return self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)
-            else:
-                return hidden
+        if self.srtab is not None :
+            sw_force \
+                = op_module.soft_min_force(energy_diff, 
+                                           sw_deriv,
+                                           nlist, 
+                                           natoms,
+                                           n_a_sel = nnei_a,
+                                           n_r_sel = nnei_r)
+            force = force + sw_force + tab_force
+
+        force = tf.reshape (force, [-1, 3 * natoms[1]], name = "o_force"+suffix)
+
+        if self.srtab is not None :
+            sw_virial, sw_atom_virial \
+                = op_module.soft_min_virial (energy_diff,
+                                             sw_deriv,
+                                             rij,
+                                             nlist,
+                                             natoms,
+                                             n_a_sel = nnei_a,
+                                             n_r_sel = nnei_r)
+            atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
+            virial = virial + sw_virial \
+                     + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis = 1)
+
+        virial = tf.reshape (virial, [-1, 9], name = "o_virial"+suffix)
+        atom_virial = tf.reshape (atom_virial, [-1, 9 * natoms[1]], name = "o_atom_virial"+suffix)
+
+        return energy, force, virial, energy_raw, atom_virial
     
-    def _DS_layer(self, 
-                   inputs, 
-                   natoms,
-                   activation_fn=tf.nn.tanh, 
-                   stddev=1.0,
-                   bavg=0.0,
-                   name='linear', 
-                   reuse=None,
-                   seed=None):
-        # natom x (nei x 4)
-        shape = inputs.get_shape().as_list()
-        outputs_size = [1] + self.filter_neuron
-        outputs_size_2 = self.n_axis_neuron
-        with tf.variable_scope(name, reuse=reuse):
-          start_index = 0
-          xyz_scatter_total = []
-          for type_i in range(self.ntypes):
-            # cut-out inputs
-            # with natom x (nei_type_i x 4)  
-            inputs_i = tf.slice (inputs,
-                                 [ 0, start_index*      4],
-                                 [-1, self.sel_a[type_i]* 4] )
-            start_index += self.sel_a[type_i]
-            shape_i = inputs_i.get_shape().as_list()
-            # with (natom x nei_type_i) x 4  
-            inputs_reshape = tf.reshape(inputs_i, [-1, 4])
-            xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
-            for ii in range(1, len(outputs_size)):
-              w = tf.get_variable('matrix_'+str(ii)+'_'+str(type_i), 
-                                [outputs_size[ii - 1], outputs_size[ii]], 
-                                global_tf_float_precision,
-                                tf.random_normal_initializer(stddev=stddev/np.sqrt(outputs_size[ii]+outputs_size[ii-1]), seed = seed))
-              b = tf.get_variable('bias_'+str(ii)+'_'+str(type_i), 
-                                [1, outputs_size[ii]], 
-                                global_tf_float_precision,
-                                tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed))
-              if self.filter_resnet_dt :
-                  idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
-                                        [1, outputs_size[ii]], 
-                                        global_tf_float_precision,
-                                        tf.random_normal_initializer(stddev=0.001, mean = 1.0, seed = seed))
-              if outputs_size[ii] == outputs_size[ii-1]:
-                  if self.filter_resnet_dt :
-                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
-                  else :
-                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
-              elif outputs_size[ii] == outputs_size[ii-1] * 2: 
-                  if self.filter_resnet_dt :
-                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
-                  else :
-                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
-              else:
-                  xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
-            # natom x nei_type_i x out_size
-            xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
-            xyz_scatter_total.append(xyz_scatter)
-
-          # natom x nei x outputs_size
-          xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
-          # natom x nei x 4
-          inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4])
-          # natom x 4 x outputs_size
-          xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True)
-          xyz_scatter_1 = xyz_scatter_1 * (4.0 / shape[1])
-          # natom x 4 x outputs_size_2
-          xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
-          # natom x outputs_size x outputs_size_2
-          result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a = True)
-          # natom x (outputs_size x outputs_size_2)
-          result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])
-
-        return result
-
-    def _DS_layer_type_ext(self, 
-                           inputs, 
-                           natoms,
-                           activation_fn=tf.nn.tanh, 
-                           stddev=1.0,
-                           bavg=0.0,
-                           name='linear', 
-                           reuse=None,
-                           seed=None):
-        # natom x (nei x 4)
-        shape = inputs.get_shape().as_list()
-        outputs_size = [1] + self.filter_neuron
-        outputs_size_2 = self.n_axis_neuron
-        with tf.variable_scope(name, reuse=reuse):
-          start_index = 0
-          result_all = []
-          xyz_scatter_1_all = []
-          xyz_scatter_2_all = []
-          for type_i in range(self.ntypes):
-            # cut-out inputs
-            # with natom x (nei_type_i x 4)  
-            inputs_i = tf.slice (inputs,
-                                 [ 0, start_index*      4],
-                                 [-1, self.sel_a[type_i]* 4] )
-            start_index += self.sel_a[type_i]
-            shape_i = inputs_i.get_shape().as_list()
-            # with (natom x nei_type_i) x 4  
-            inputs_reshape = tf.reshape(inputs_i, [-1, 4])
-            xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
-            for ii in range(1, len(outputs_size)):
-              w = tf.get_variable('matrix_'+str(ii)+'_'+str(type_i), 
-                                [outputs_size[ii - 1], outputs_size[ii]], 
-                                global_tf_float_precision,
-                                tf.random_normal_initializer(stddev=stddev/np.sqrt(outputs_size[ii]+outputs_size[ii-1]), seed = seed))
-              b = tf.get_variable('bias_'+str(ii)+'_'+str(type_i), 
-                                [1, outputs_size[ii]], 
-                                global_tf_float_precision,
-                                tf.random_normal_initializer(stddev=stddev, mean = bavg, seed = seed))
-              if self.filter_resnet_dt :
-                  idt = tf.get_variable('idt_'+str(ii)+'_'+str(type_i), 
-                                        [1, outputs_size[ii]], 
-                                        global_tf_float_precision,
-                                        tf.random_normal_initializer(stddev=0.001, mean = 1.0, seed = seed))
-              if outputs_size[ii] == outputs_size[ii-1]:
-                  if self.filter_resnet_dt :
-                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
-                  else :
-                      xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
-              elif outputs_size[ii] == outputs_size[ii-1] * 2: 
-                  if self.filter_resnet_dt :
-                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b) * idt
-                  else :
-                      xyz_scatter = tf.concat([xyz_scatter,xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
-              else:
-                  xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
-            # natom x nei_type_i x out_size
-            xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
-            # natom x nei_type_i x 4  
-            inputs_i_reshape = tf.reshape(inputs_i, [-1, shape_i[1]//4, 4])
-            # natom x 4 x outputs_size
-            xyz_scatter_1 = tf.matmul(inputs_i_reshape, xyz_scatter, transpose_a = True)
-            xyz_scatter_1 = xyz_scatter_1 * (4.0 / shape_i[1])
-            # natom x 4 x outputs_size_2
-            xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
-            xyz_scatter_1_all.append(xyz_scatter_1)
-            xyz_scatter_2_all.append(xyz_scatter_2)
-
-          # for type_i in range(self.ntypes):
-          #   for type_j in range(type_i, self.ntypes):
-          #     # natom x outputs_size x outputs_size_2
-          #     result = tf.matmul(xyz_scatter_1_all[type_i], xyz_scatter_2_all[type_j], transpose_a = True)
-          #     # natom x (outputs_size x outputs_size_2)
-          #     result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])
-          #     result_all.append(tf.identity(result))
-          xyz_scatter_2_coll = tf.concat(xyz_scatter_2_all, axis = 2)
-          for type_i in range(self.ntypes) :
-              # natom x outputs_size x (outputs_size_2 x ntypes)
-              result = tf.matmul(xyz_scatter_1_all[type_i], xyz_scatter_2_coll, transpose_a = True)
-              # natom x (outputs_size x outputs_size_2 x ntypes)
-              result = tf.reshape(result, [-1, outputs_size_2 * self.ntypes * outputs_size[-1]])
-              result_all.append(tf.identity(result))              
-
-          # natom x (ntypes x outputs_size x outputs_size_2 x ntypes)
-          result_all = tf.concat(result_all, axis = 1)
-
-        return result_all
diff --git a/source/train/ModelHyb.py b/source/train/ModelHyb.py
new file mode 100644
index 0000000000..714320cd4e
--- /dev/null
+++ b/source/train/ModelHyb.py
@@ -0,0 +1,101 @@
+import os,warnings
+import numpy as np
+import tensorflow as tf
+from deepmd.common import j_must_have, j_must_have_d, j_have
+from deepmd.Model import Model
+from deepmd.ModelSeA import ModelSeA
+from deepmd.ModelSeR import ModelSeR
+
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
+op_module = tf.load_op_library(module_path + "libop_abi.so")
+
+class ModelHyb() :
+    def __init__ (self, jdata_a, jdata_r):
+        self.model_a = ModelSeA(jdata_a)
+        self.model_r = ModelSeR(jdata_r)
+            
+    def get_rcut(self): 
+        return self.model_r.get_rcut()
+
+    def get_ntypes(self): 
+        return self.model_r.get_ntypes()
+
+    def data_stat(self, data):
+        all_stat_coord = []
+        all_stat_box = []
+        all_stat_type = []
+        all_natoms_vec = []
+        all_default_mesh = []
+        for ii in range(data.get_nsystems()) :
+            stat_prop_c, \
+                stat_energy, stat_force, stat_virial, start_atom_ener, \
+                stat_coord, stat_box, stat_type, stat_fparam, natoms_vec, default_mesh \
+                = data.get_batch (sys_idx = ii)
+            natoms_vec = natoms_vec.astype(np.int32)            
+            all_stat_coord.append(stat_coord)
+            all_stat_box.append(stat_box)
+            all_stat_type.append(stat_type)
+            all_natoms_vec.append(natoms_vec)
+            all_default_mesh.append(default_mesh)
+
+        davg, dstd = self.compute_dstats (all_stat_coord, all_stat_box, all_stat_type, all_natoms_vec, all_default_mesh)
+
+        bias_atom_e = data.compute_energy_shift()
+
+        return davg, dstd, bias_atom_e
+
+    def compute_dstats (self,
+                        data_coord, 
+                        data_box, 
+                        data_atype, 
+                        natoms_vec,
+                        mesh,
+                        reuse = None) :    
+        davg_a, dstd_a = self.model_a.compute_dstats(data_coord, data_box, data_atype, natoms_vec, mesh, reuse)
+        davg_r, dstd_r = self.model_r.compute_dstats(data_coord, data_box, data_atype, natoms_vec, mesh, reuse)
+        return [davg_a, davg_r], [dstd_a, dstd_r]
+
+    def build_interaction(self, 
+                          coord, 
+                          atype, 
+                          natoms, 
+                          box, 
+                          mesh, 
+                          fparam, 
+                          davg,
+                          dstd,
+                          bias_atom_e, 
+                          suffix = '',
+                          reuse_attr = None,
+                          reuse_weights = None) :
+        e_a, f_a, v_a, ae_a, av_a \
+            = self.model_a.build_interaction(coord, atype, natoms, box, mesh, fparam, davg[0], dstd[0], bias_atom_e, suffix = '_a'+suffix, reuse_attr = reuse_attr, reuse_weights = reuse_weights)
+        e_r, f_r, v_r, ae_r, av_r \
+            = self.model_r.build_interaction(coord, atype, natoms, box, mesh, fparam, davg[1], dstd[1], bias_atom_e, suffix = '_r'+suffix, reuse_attr = reuse_attr, reuse_weights = reuse_weights)
+        with tf.variable_scope('model_attr' + suffix, reuse = reuse_attr) :
+            t_rcut = tf.constant(self.model_r.get_rcut(), 
+                                 name = 'rcut', 
+                                 dtype = global_tf_float_precision)
+            t_ntypes = tf.constant(self.model_r.get_ntypes(), 
+                                   name = 'ntypes', 
+                                   dtype = tf.int32)
+            t_dfparam = tf.constant(self.model_r.get_numb_fparam(), 
+                                    name = 'dfparam', 
+                                    dtype = tf.int32)
+            t_tmap = tf.constant(' '.join(self.model_r.get_type_map()), 
+                                 name = 'tmap', 
+                                 dtype = tf.string)
+            
+        energy = tf.add(e_a, e_r, name = 'o_energy'+suffix)
+        force  = tf.add(f_a, f_r, name = 'o_force' +suffix)
+        virial = tf.add(v_a, v_r, name = 'o_virial'+suffix)
+        ae = tf.add(ae_a, ae_r, name = 'o_atom_energy'+suffix)
+        av = tf.add(av_a, av_r, name = 'o_atom_virial'+suffix)
+        return energy, force, virial, ae, av
diff --git a/source/train/TabInter.py b/source/train/TabInter.py
new file mode 100644
index 0000000000..e6de0bb42e
--- /dev/null
+++ b/source/train/TabInter.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import os, sys, shutil
+import numpy as np
+from scipy.interpolate import CubicSpline
+
+class TabInter (object):
+    def __init__(self,
+                 filename):
+        self.reinit(filename)
+        
+    def reinit(self,
+               filename):
+        self.vdata = np.loadtxt(filename)
+        self.rmin = self.vdata[0][0]
+        self.hh = self.vdata[1][0] - self.vdata[0][0]
+        self.nspline = self.vdata.shape[0] - 1
+        ncol = self.vdata.shape[1] - 1
+        n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5
+        self.ntypes = int(n0 + 0.1)
+        assert(self.ntypes * (self.ntypes+1) // 2 == ncol),\
+            "number of volumes provided in %s does not match guessed number of types %d" % (filename, self.ntypes)
+        self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes])
+        self.tab_data = self._make_data()
+
+    def get(self) :
+        return self.tab_info, self.tab_data
+
+    def _make_data(self) :
+        data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline])
+        stride = 4 * self.nspline
+        idx_iter = 0
+        xx = self.vdata[:,0]
+        for t0 in range(self.ntypes) :
+            for t1 in range(t0, self.ntypes) :
+                vv = self.vdata[:,1+idx_iter]
+                cs = CubicSpline(xx, vv)
+                dd = cs(xx, 1)
+                dd *= self.hh
+                dtmp = np.zeros(stride)
+                for ii in range(self.nspline) :
+                    dtmp[ii*4+0] = 2 * vv[ii] - 2 * vv[ii+1] +     dd[ii] + dd[ii+1]
+                    dtmp[ii*4+1] =-3 * vv[ii] + 3 * vv[ii+1] - 2 * dd[ii] - dd[ii+1]
+                    dtmp[ii*4+2] = dd[ii]
+                    dtmp[ii*4+3] = vv[ii]
+                data[(t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride + stride] \
+                    = dtmp
+                data[(t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride + stride] \
+                    = dtmp
+                idx_iter += 1
+        return data
diff --git a/source/train/Test.py b/source/train/Test.py
deleted file mode 100644
index 406c8d99a8..0000000000
--- a/source/train/Test.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-import time
-import numpy as np
-import glob
-import tensorflow as tf
-
-from tensorflow.python.framework import ops
-
-# load force module
-module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
-assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
-op_module = tf.load_op_library(module_path + "libop_abi.so")
-
-# load grad of force module
-sys.path.append (module_path )
-import _prod_force_grad
-import _prod_virial_grad
-
-class DataSets (object):
-    def __init__ (self, 
-                  set_prefix = "set",
-                  hh = 1e-6,
-                  seed = None) :
-        self.dirs = glob.glob (set_prefix + ".*")
-        self.dirs.sort()        
-        self.test_dir = self.dirs[-1]
-        self.set_count = 0
-        self.hh = hh
-        self.load_test_set (self.test_dir)
-
-    def get_numb_set (self) :
-        return len (self.train_dirs)
-    
-    def stats_energy (self) :
-        eners = []
-        for ii in self.dirs:
-            ei = np.load (ii + "/energy.npy")
-            eners.append (np.average(ei))
-        return np.average (eners)    
-
-    def load_test_set (self,
-                       set_name) :
-        start_time = time.time()
-        coord_test = np.load (set_name + "/coord.npy")
-        box_test = np.load (set_name + "/box.npy")
-        # dirty workaround, type in type.raw should be sorted
-        type_test = np.loadtxt (set_name + "/../type.raw")
-
-        self.coord_test0        = np.array([coord_test[0]])
-        self.box_test0          = np.array([box_test[0]])
-        self.type_test0         = np.array([type_test])
-
-        self.coord_test         = [coord_test[0]]
-        self.box_test           = [box_test[0]]
-        self.type_test          = np.array([type_test])
-
-        coord0 = np.copy (self.coord_test[0])
-
-        self.natoms = self.type_test[0].shape[0]
-        for ii in range(self.natoms * 3) :
-            p_coord = np.copy (coord0)
-            n_coord = np.copy (coord0)
-            p_coord[ii] += self.hh
-            n_coord[ii] -= self.hh
-            self.coord_test.append (p_coord)
-            self.coord_test.append (n_coord)
-            self.box_test.append (box_test[0])
-            self.box_test.append (box_test[0])
-
-        self.coord_test = np.array(self.coord_test)
-        self.box_test = np.array(self.box_test)
-        self.type_test = np.tile (self.type_test, (2 * self.natoms * 3 + 1, 1))
-
-        end_time = time.time()
-        
-    def get_test (self) :
-        return self.coord_test, self.box_test, self.type_test        
-
-    def get_test0 (self) :
-        return self.coord_test0, self.box_test0, self.type_test0        
-
-    def get_test_box (self,
-                      hh) :
-        coord0_, box0_, type0_ = self.get_test0()
-        coord0 = coord0_[0]
-        box0 = box0_[0]
-        type0 = type0_[0]
-        nc = np.array( [coord0, coord0*(1+hh), coord0*(1-hh)] )
-        nb = np.array( [box0, box0*(1+hh), box0*(1-hh)] )
-        nt = np.array( [type0, type0, type0] )
-        for dd in range(3) :
-            tmpc = np.copy (coord0)
-            tmpb = np.copy (box0)
-            tmpc = np.reshape(tmpc, [-1, 3])
-            tmpc [:,dd] *= (1+hh)
-            tmpc = np.reshape(tmpc, [-1])
-            tmpb = np.reshape(tmpb, [-1, 3])
-            tmpb [dd,:] *= (1+hh)
-            tmpb = np.reshape(tmpb, [-1])
-            nc = np.append (nc, [tmpc], axis = 0)
-            nb = np.append (nb, [tmpb], axis = 0)
-            nt = np.append (nt, [type0], axis = 0)
-            tmpc = np.copy (coord0)
-            tmpb = np.copy (box0)
-            tmpc = np.reshape(tmpc, [-1, 3])
-            tmpc [:,dd] *= (1-hh)
-            tmpc = np.reshape(tmpc, [-1])
-            tmpb = np.reshape(tmpb, [-1, 3])
-            tmpb [dd,:] *= (1-hh)
-            tmpb = np.reshape(tmpb, [-1])
-            nc = np.append (nc, [tmpc], axis = 0)
-            nb = np.append (nb, [tmpb], axis = 0)
-            nt = np.append (nt, [type0], axis = 0)
-        return nc, nb, nt
-
-    def get_natoms (self) :
-        ntype1 = np.sum (self.type_test0)
-        tmp = np.array([self.natoms, self.natoms, self.natoms - ntype1, ntype1])
-        return tmp.astype(np.int32)
-
-    def get_h (self) :
-        return self.hh
-
-class Model (object) :
-    def __init__ (self, 
-                  sess, 
-                  data, 
-                  comp = 0) :
-        self.sess = sess
-        self.natoms = data.get_natoms()
-        self.comp = comp
-        self.sel_a = [12,24]
-        self.sel_r = [12,24]
-        self.rcut_a = -1
-        self.rcut_r = 3.45     
-        self.axis_rule = [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
-        self.nnei_a = np.cumsum(self.sel_a)[-1]
-        self.nnei_r = np.cumsum(self.sel_r)[-1]
-        self.nnei = self.nnei_a + self.nnei_r
-        self.ndescrpt_a = self.nnei_a * 4
-        self.ndescrpt_r = self.nnei_r * 1
-        self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros (self.ndescrpt)
-        dstd = np.ones  (self.ndescrpt)
-        self.t_avg = tf.constant(davg.astype(np.float64))
-        self.t_std = tf.constant(dstd.astype(np.float64))
-        self.default_mesh = np.zeros (6, dtype = np.int32)
-        self.default_mesh[3] = 2
-        self.default_mesh[4] = 2
-        self.default_mesh[5] = 2
-
-    def net (self,
-             inputs, 
-             name,
-             reuse = False) :
-        with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     tf.float64,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist, axis \
-            = op_module.descrpt (dcoord, 
-                                 dtype,
-                                 tnatoms,
-                                 dbox, 
-                                 tf.constant(self.default_mesh),
-                                 self.t_avg,
-                                 self.t_std,
-                                 rcut_a = self.rcut_a, 
-                                 rcut_r = self.rcut_r, 
-                                 sel_a = self.sel_a, 
-                                 sel_r = self.sel_r, 
-                                 axis_rule = self.axis_rule)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self.net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
-        net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force (net_deriv_reshape, 
-                                      descrpt_deriv, 
-                                      nlist, 
-                                      axis, 
-                                      tnatoms,
-                                      n_a_sel = self.nnei_a, 
-                                      n_r_sel = self.nnei_r)
-        virial, atom_vir = op_module.prod_virial (net_deriv_reshape, 
-                                                  descrpt_deriv, 
-                                                  rij,
-                                                  nlist, 
-                                                  axis, 
-                                                  tnatoms,
-                                                  n_a_sel = self.nnei_a, 
-                                                  n_r_sel = self.nnei_r)
-        return energy, force, virial
-
-    def comp_fl (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,                 
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
-        with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
-        return f_mag, f_mag_dw[0]
-
-    def comp_vl (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,                 
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
-        with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
-        return v_mag, v_mag_dw[0]
-
-    def make_place (self) :
-        self.coord      = tf.placeholder(tf.float64, [None, self.natoms[0] * 3], name='t_coord')
-        self.box        = tf.placeholder(tf.float64, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        
-    def make_feed_dict (self, 
-                        data ) :
-        dcoord, dbox, dtype = data.get_test ()
-        return {self.coord:     dcoord,
-                self.box:       dbox,
-                self.type:      dtype, 
-                self.tnatoms:   self.natoms,
-        }
-
-    def make_feed_dict0 (self, 
-                         data ) :
-        dcoord, dbox, dtype = data.get_test0 ()
-        return {self.coord:     dcoord,
-                self.box:       dbox,
-                self.type:      dtype,
-                self.tnatoms:   self.natoms,
-        }
-
-    def test_force (self, 
-                    data) :
-        self.make_place ()
-        feed_dict_test = self.make_feed_dict (data)
-        feed_dict_test0 = self.make_feed_dict0 (data)
-
-        self.net_w_i = 1 * np.ones (self.ndescrpt)
-        t_energy, t_force, t_virial = self.comp_ef (self.coord, self.box, self.type, self.tnatoms,  name = "test_0")        
-        self.sess.run (tf.global_variables_initializer())
-        energy = self.sess.run (t_energy, feed_dict = feed_dict_test)
-        force  = self.sess.run (t_force , feed_dict = feed_dict_test)        
-        # virial = self.sess.run (t_virial , feed_dict = feed_dict_test)        
-        
-        hh2 = data.get_h() * 2.
-        ndof = (len(energy) - 1) // 2
-        absolut_e = []
-        relativ_e = []
-        for ii in range (ndof) :
-            idx0 = ii * 2 + 1
-            idx1 = ii * 2 + 2
-            #              +hh            -hh
-            num_force = - (energy[idx0] - energy[idx1]) / hh2
-            ana_force = force[0][ii]
-            diff = np.abs(num_force - ana_force)
-            absolut_e.append (diff)
-            relativ_e.append (diff / np.abs(ana_force))
-            print ("component  %6u \t value %12.5e \t diff: %10.2e \t relat: %10.2e" % (ii, ana_force, diff, np.abs(diff/ana_force)))
-
-        print ("max absolute %e" % np.max(absolut_e))
-        print ("max relative %e" % np.max(relativ_e))        
-
-    def comp_vol (self, 
-                  box) : 
-        return np.linalg.det (np.reshape(box, (3,3)))
-
-    def test_virial (self, 
-                     data ) :
-        hh = 1e-6
-
-        self.make_place ()
-        dcoord, dbox, dtype = data.get_test_box (hh)
-        feed_dict_box =  {self.coord:     dcoord,
-                          self.box:       dbox,
-                          self.type:      dtype, 
-                          self.tnatoms:   self.natoms, 
-        }
-
-        self.net_w_i = 1 * np.ones (self.ndescrpt)
-
-        t_energy, t_force, t_virial = self.comp_ef (self.coord, self.box, self.type, self.tnatoms,  name = "test_0")
-        self.sess.run (tf.global_variables_initializer())
-        virial = self.sess.run (t_virial , feed_dict = feed_dict_box)
-        energy = self.sess.run (t_energy , feed_dict = feed_dict_box)
-
-        print ("printing virial")
-        ana_vir3 = (virial[0][0] + virial[0][4] + virial[0][8])/3. / self.comp_vol(dbox[0])
-        num_vir3 = -(energy[1] - energy[2]) / (self.comp_vol(dbox[1]) - self.comp_vol(dbox[2]))
-        print ( "all-dir:  ana %14.5e  num %14.5e  diff %.2e" % (ana_vir3, num_vir3, np.abs(ana_vir3 - num_vir3)) )
-        vir_idx = [0, 4, 8]
-        ana_v = []
-        num_v = []
-        for dd in range (3) :
-            ana_v.append (virial[0][vir_idx[dd]] / self.comp_vol(dbox[0]))
-            idx = 2 * (dd+1) + 1
-            num_v.append ( -(energy[idx] - energy[idx+1]) / (self.comp_vol(dbox[idx]) - self.comp_vol(dbox[idx+1])) )
-        for dd in range (3) :
-            print ( "dir   %d:  ana %14.5e  num %14.5e  diff %.2e" % (dd, ana_v[dd], num_v[dd], np.abs(ana_v[dd] - num_v[dd])) )
-
-    def test_dw (self, 
-                 data) :
-        self.make_place ()
-        feed_dict_test0 = self.make_feed_dict0 (data)
-
-        w0 = np.ones (self.ndescrpt)
-        self.net_w_i = np.copy(w0)
-        
-        t_ll, t_dw = self.comp_fl (self.coord, self.box, self.type, self.tnatoms, name = "test_0")
-        self.sess.run (tf.global_variables_initializer())
-        ll_0 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-        dw_0 = self.sess.run (t_dw, feed_dict = feed_dict_test0)
-        
-        hh = 1e-4
-        absolut_e = []
-        relativ_e = []
-        for ii in range (self.ndescrpt) :
-            self.net_w_i = np.copy (w0)
-            self.net_w_i[ii] += hh
-            t_ll, t_dw = self.comp_fl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+1))
-            self.sess.run (tf.global_variables_initializer())
-            ll_1 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            self.net_w_i[ii] -= 2. * hh
-            t_ll, t_dw = self.comp_fl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+2))
-            self.sess.run (tf.global_variables_initializer())
-            ll_2 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            num_v = (ll_1 - ll_2) / (2. * hh)
-            ana_v = dw_0[ii]
-            diff = np.abs (num_v - ana_v)
-            if (np.abs(ana_v) < 1e-10) :
-                diff_r = diff
-            else :
-                diff_r = diff / np.abs(ana_v)
-            print ("component  %6u \t value %12.5e n_v %.12e \t diff: %10.2e \t relat: %10.2e" % (ii, ana_v, num_v, diff, diff_r))
-            absolut_e.append (diff)
-            relativ_e.append (diff_r)
-        
-        print ("max absolute %e" % np.max(absolut_e))
-        print ("max relative %e" % np.max(relativ_e))
-
-    def test_virial_dw (self, 
-                        data) :
-        self.make_place ()
-        feed_dict_test0 = self.make_feed_dict0 (data)
-
-        w0 = np.ones (self.ndescrpt)
-        self.net_w_i = np.copy(w0)
-        
-        t_ll, t_dw = self.comp_vl (self.coord, self.box, self.type, self.tnatoms, name = "test_0")
-        self.sess.run (tf.global_variables_initializer())
-        ll_0 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-        dw_0 = self.sess.run (t_dw, feed_dict = feed_dict_test0)
-        
-        hh = 1e-4
-        absolut_e = []
-        relativ_e = []
-        for ii in range (self.ndescrpt) :
-            self.net_w_i = np.copy (w0)
-            self.net_w_i[ii] += hh
-            t_ll, t_dw = self.comp_vl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+1))
-            self.sess.run (tf.global_variables_initializer())
-            ll_1 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            self.net_w_i[ii] -= 2. * hh
-            t_ll, t_dw = self.comp_vl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+2))
-            self.sess.run (tf.global_variables_initializer())
-            ll_2 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            num_v = (ll_1 - ll_2) / (2. * hh)
-            ana_v = dw_0[ii]
-            diff = np.abs (num_v - ana_v)
-            if (np.abs(ana_v) < 1e-10) :
-                diff_r = diff
-            else :
-                diff_r = diff / np.abs(ana_v)
-            print ("component  %6u \t value %12.5e  n_v %12.5e \t diff: %10.2e \t relat: %10.2e" % (ii, ana_v, num_v, diff, diff_r))
-            absolut_e.append (diff)
-            relativ_e.append (diff_r)
-        
-        print ("max absolute %e" % np.max(absolut_e))
-        print ("max relative %e" % np.max(relativ_e))
-
-def _main () :
-    data = DataSets (set_prefix = "set")
-    tf.reset_default_graph()
-
-    with tf.Session() as sess:
-        md = Model (sess, data)
-        md.test_force (data)
-        # md.test_virial (data)
-        # md.test_dw (data)
-        # md.test_virial_dw (data)
-
-if __name__ == '__main__':
-    _main()
-
diff --git a/source/train/TestNorot.py b/source/train/TestNorot.py
deleted file mode 100644
index c45ccb05f7..0000000000
--- a/source/train/TestNorot.py
+++ /dev/null
@@ -1,466 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-import time
-import numpy as np
-import glob
-import tensorflow as tf
-
-from tensorflow.python.framework import ops
-
-# load force module
-module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
-assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
-op_module = tf.load_op_library(module_path + "libop_abi.so")
-
-# load grad of force module
-sys.path.append (module_path )
-import _prod_force_norot_grad
-import _prod_virial_norot_grad
-
-class DataSets (object):
-    def __init__ (self, 
-                  set_prefix = "set",
-                  hh = 1e-6,
-                  seed = None) :
-        self.dirs = glob.glob (set_prefix + ".*")
-        self.dirs.sort()        
-        self.test_dir = self.dirs[-1]
-        self.set_count = 0
-        self.hh = hh
-        self.load_test_set (self.test_dir)
-
-    def get_numb_set (self) :
-        return len (self.train_dirs)
-    
-    def stats_energy (self) :
-        eners = []
-        for ii in self.dirs:
-            ei = np.load (ii + "/energy.npy")
-            eners.append (np.average(ei))
-        return np.average (eners)    
-
-    def load_test_set (self,
-                       set_name) :
-        start_time = time.time()
-        coord_test = np.load (set_name + "/coord.npy")
-        box_test = np.load (set_name + "/box.npy")
-        # dirty workaround, type in type.raw should be sorted
-        type_test = np.loadtxt (set_name + "/../type.raw")
-
-        self.coord_test0        = np.array([coord_test[0]])
-        self.box_test0          = np.array([box_test[0]])
-        self.type_test0         = np.array([type_test])
-
-        self.coord_test         = [coord_test[0]]
-        self.box_test           = [box_test[0]]
-        self.type_test          = np.array([type_test])
-
-        coord0 = np.copy (self.coord_test[0])
-
-        self.natoms = self.type_test[0].shape[0]
-        for ii in range(self.natoms * 3) :
-            p_coord = np.copy (coord0)
-            n_coord = np.copy (coord0)
-            p_coord[ii] += self.hh
-            n_coord[ii] -= self.hh
-            self.coord_test.append (p_coord)
-            self.coord_test.append (n_coord)
-            self.box_test.append (box_test[0])
-            self.box_test.append (box_test[0])
-
-        self.coord_test = np.array(self.coord_test)
-        self.box_test = np.array(self.box_test)
-        self.type_test = np.tile (self.type_test, (2 * self.natoms * 3 + 1, 1))
-
-        end_time = time.time()
-        
-    def get_test (self) :
-        return self.coord_test, self.box_test, self.type_test        
-
-    def get_test0 (self) :
-        return self.coord_test0, self.box_test0, self.type_test0        
-
-    def get_test_box (self,
-                      hh) :
-        coord0_, box0_, type0_ = self.get_test0()
-        coord0 = coord0_[0]
-        box0 = box0_[0]
-        type0 = type0_[0]
-        nc = np.array( [coord0, coord0*(1+hh), coord0*(1-hh)] )
-        nb = np.array( [box0, box0*(1+hh), box0*(1-hh)] )
-        nt = np.array( [type0, type0, type0] )
-        for dd in range(3) :
-            tmpc = np.copy (coord0)
-            tmpb = np.copy (box0)
-            tmpc = np.reshape(tmpc, [-1, 3])
-            tmpc [:,dd] *= (1+hh)
-            tmpc = np.reshape(tmpc, [-1])
-            tmpb = np.reshape(tmpb, [-1, 3])
-            tmpb [dd,:] *= (1+hh)
-            tmpb = np.reshape(tmpb, [-1])
-            nc = np.append (nc, [tmpc], axis = 0)
-            nb = np.append (nb, [tmpb], axis = 0)
-            nt = np.append (nt, [type0], axis = 0)
-            tmpc = np.copy (coord0)
-            tmpb = np.copy (box0)
-            tmpc = np.reshape(tmpc, [-1, 3])
-            tmpc [:,dd] *= (1-hh)
-            tmpc = np.reshape(tmpc, [-1])
-            tmpb = np.reshape(tmpb, [-1, 3])
-            tmpb [dd,:] *= (1-hh)
-            tmpb = np.reshape(tmpb, [-1])
-            nc = np.append (nc, [tmpc], axis = 0)
-            nb = np.append (nb, [tmpb], axis = 0)
-            nt = np.append (nt, [type0], axis = 0)
-        return nc, nb, nt
-
-    def get_natoms (self) :
-        ntype1 = np.sum (self.type_test0)
-        tmp = np.array([self.natoms, self.natoms, self.natoms - ntype1, ntype1])
-        return tmp.astype(np.int32)
-
-    def get_h (self) :
-        return self.hh
-
-class Model (object) :
-    def __init__ (self, 
-                  sess, 
-                  data, 
-                  comp = 0) :
-        self.sess = sess
-        self.natoms = data.get_natoms()
-        self.comp = comp
-        self.sel_a = [12, 24]
-        self.sel_r = [0,0]
-        self.rcut_a = -1
-        self.rcut_r_smth = 2.45
-        self.rcut_r = 3.45     
-        self.axis_rule = [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
-        self.nnei_a = np.cumsum(self.sel_a)[-1]
-        self.nnei_r = np.cumsum(self.sel_r)[-1]
-        self.nnei = self.nnei_a + self.nnei_r
-        self.ndescrpt_a = self.nnei_a * 4
-        self.ndescrpt_r = self.nnei_r * 1
-        self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros (self.ndescrpt)
-        dstd = np.ones  (self.ndescrpt)
-        self.t_avg = tf.constant(davg.astype(np.float64))
-        self.t_std = tf.constant(dstd.astype(np.float64))
-        self.default_mesh = np.zeros (6, dtype = np.int32)
-        self.default_mesh[3] = 2
-        self.default_mesh[4] = 2
-        self.default_mesh[5] = 2
-
-    def net (self,
-             inputs, 
-             name,
-             reuse = False) :
-        with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     tf.float64,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-
-    def comp_descrpt (self, 
-                      dcoord, 
-                      dbox, 
-                      dtype,
-                      tnatoms,
-                      name) :
-        descrpt, descrpt_deriv, rij, nlist, \
-            = op_module.descrpt_norot (dcoord, 
-                                       dtype,
-                                       tnatoms,
-                                       dbox, 
-                                       tf.constant(self.default_mesh),
-                                       self.t_avg,
-                                       self.t_std,
-                                       rcut_a = self.rcut_a, 
-                                       rcut_r = self.rcut_r, 
-                                       rcut_r_smth = self.rcut_r_smth,
-                                       sel_a = self.sel_a, 
-                                       sel_r = self.sel_r)
-        return descrpt, descrpt_deriv
-
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist, \
-            = op_module.descrpt_norot (dcoord, 
-                                       dtype,
-                                       tnatoms,
-                                       dbox, 
-                                       tf.constant(self.default_mesh),
-                                       self.t_avg,
-                                       self.t_std,
-                                       rcut_a = self.rcut_a, 
-                                       rcut_r = self.rcut_r, 
-                                       rcut_r_smth = self.rcut_r_smth,
-                                       sel_a = self.sel_a, 
-                                       sel_r = self.sel_r)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self.net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
-        net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force_norot (net_deriv_reshape, 
-                                            descrpt_deriv, 
-                                            nlist, 
-                                            tnatoms,
-                                            n_a_sel = self.nnei_a, 
-                                            n_r_sel = self.nnei_r)
-        virial, atom_vir = op_module.prod_virial_norot (net_deriv_reshape, 
-                                                        descrpt_deriv, 
-                                                        rij,
-                                                        nlist, 
-                                                        tnatoms,
-                                                        n_a_sel = self.nnei_a, 
-                                                        n_r_sel = self.nnei_r)
-        return energy, force, virial
-
-    def comp_fl (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,                 
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
-        with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
-        return f_mag, f_mag_dw[0]
-
-    def comp_vl (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,                 
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
-        with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], tf.float64, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
-        return v_mag, v_mag_dw[0]
-
-    def make_place (self) :
-        self.coord      = tf.placeholder(tf.float64, [None, self.natoms[0] * 3], name='t_coord')
-        self.box        = tf.placeholder(tf.float64, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        
-    def make_feed_dict (self, 
-                        data ) :
-        dcoord, dbox, dtype = data.get_test ()
-        return {self.coord:     dcoord,
-                self.box:       dbox,
-                self.type:      dtype, 
-                self.tnatoms:   self.natoms,
-        }
-
-    def make_feed_dict0 (self, 
-                         data ) :
-        dcoord, dbox, dtype = data.get_test0 ()
-        return {self.coord:     dcoord,
-                self.box:       dbox,
-                self.type:      dtype,
-                self.tnatoms:   self.natoms,
-        }
-
-    def test_descrpt (self, 
-                      data) :
-        self.make_place ()
-        feed_dict_test = self.make_feed_dict (data)
-        feed_dict_test0 = self.make_feed_dict0 (data)
-
-        self.net_w_i = 1 * np.ones (self.ndescrpt)
-        t_descrpt, t_descrpt_deriv = self.comp_descrpt (self.coord, self.box, self.type, self.tnatoms,  name = "test_0")
-
-        self.sess.run (tf.global_variables_initializer())
-        [mydescrpt, mydescrpt_deriv] = self.sess.run ([t_descrpt, t_descrpt_deriv], feed_dict = feed_dict_test)
-        
-        print (mydescrpt)
-        print (mydescrpt.shape)
-        print (mydescrpt_deriv)
-        print (mydescrpt_deriv.shape)
-        
-
-    def test_force (self, 
-                    data) :
-        self.make_place ()
-        feed_dict_test = self.make_feed_dict (data)
-        feed_dict_test0 = self.make_feed_dict0 (data)
-
-        self.net_w_i = 1 * np.ones (self.ndescrpt)
-        t_energy, t_force, t_virial = self.comp_ef (self.coord, self.box, self.type, self.tnatoms,  name = "test_0")                
-        self.sess.run (tf.global_variables_initializer())
-        energy = self.sess.run (t_energy, feed_dict = feed_dict_test)
-        force  = self.sess.run (t_force , feed_dict = feed_dict_test)
-        # virial = self.sess.run (t_virial , feed_dict = feed_dict_test)        
-        
-        hh2 = data.get_h() * 2.
-        ndof = (len(energy) - 1) // 2
-        absolut_e = []
-        relativ_e = []
-        for ii in range (ndof) :
-            idx0 = ii * 2 + 1
-            idx1 = ii * 2 + 2
-            #              +hh            -hh
-            num_force = - (energy[idx0] - energy[idx1]) / hh2
-            ana_force = force[0][ii]
-            diff = np.abs(num_force - ana_force)
-            absolut_e.append (diff)
-            relativ_e.append (diff / np.abs(ana_force))
-            print ("component  %6u \t value %12.5e numerical %12.5e \t diff: %10.2e \t relat: %10.2e" % (ii, ana_force, num_force, diff, np.abs(diff/ana_force)))
-
-        print ("max absolute %e" % np.max(absolut_e))
-        print ("max relative %e" % np.max(relativ_e))        
-
-    def comp_vol (self, 
-                  box) : 
-        return np.linalg.det (np.reshape(box, (3,3)))
-
-    def test_virial (self, 
-                     data ) :
-        hh = 1e-6
-
-        self.make_place ()
-        dcoord, dbox, dtype = data.get_test_box (hh)
-        feed_dict_box =  {self.coord:     dcoord,
-                          self.box:       dbox,
-                          self.type:      dtype, 
-                          self.tnatoms:   self.natoms, 
-        }
-
-        self.net_w_i = 1 * np.ones (self.ndescrpt)
-
-        t_energy, t_force, t_virial = self.comp_ef (self.coord, self.box, self.type, self.tnatoms,  name = "test_0")
-        self.sess.run (tf.global_variables_initializer())
-        virial = self.sess.run (t_virial , feed_dict = feed_dict_box)
-        energy = self.sess.run (t_energy , feed_dict = feed_dict_box)
-
-        print ("printing virial")
-        ana_vir3 = (virial[0][0] + virial[0][4] + virial[0][8])/3. / self.comp_vol(dbox[0])
-        num_vir3 = -(energy[1] - energy[2]) / (self.comp_vol(dbox[1]) - self.comp_vol(dbox[2]))
-        print ( "all-dir:  ana %14.5e  num %14.5e  diff %.2e" % (ana_vir3, num_vir3, np.abs(ana_vir3 - num_vir3)) )
-        vir_idx = [0, 4, 8]
-        ana_v = []
-        num_v = []
-        for dd in range (3) :
-            ana_v.append (virial[0][vir_idx[dd]] / self.comp_vol(dbox[0]))
-            idx = 2 * (dd+1) + 1
-            num_v.append ( -(energy[idx] - energy[idx+1]) / (self.comp_vol(dbox[idx]) - self.comp_vol(dbox[idx+1])) )
-        for dd in range (3) :
-            print ( "dir   %d:  ana %14.5e  num %14.5e  diff %.2e" % (dd, ana_v[dd], num_v[dd], np.abs(ana_v[dd] - num_v[dd])) )
-
-    def test_dw (self, 
-                 data) :
-        self.make_place ()
-        feed_dict_test0 = self.make_feed_dict0 (data)
-
-        w0 = np.ones (self.ndescrpt)
-        self.net_w_i = np.copy(w0)
-        
-        t_ll, t_dw = self.comp_fl (self.coord, self.box, self.type, self.tnatoms, name = "test_0")
-        self.sess.run (tf.global_variables_initializer())
-        ll_0 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-        dw_0 = self.sess.run (t_dw, feed_dict = feed_dict_test0)
-        
-        hh = 1e-4
-        absolut_e = []
-        relativ_e = []
-        for ii in range (self.ndescrpt) :
-            self.net_w_i = np.copy (w0)
-            self.net_w_i[ii] += hh
-            t_ll, t_dw = self.comp_fl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+1))
-            self.sess.run (tf.global_variables_initializer())
-            ll_1 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            self.net_w_i[ii] -= 2. * hh
-            t_ll, t_dw = self.comp_fl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+2))
-            self.sess.run (tf.global_variables_initializer())
-            ll_2 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            num_v = (ll_1 - ll_2) / (2. * hh)
-            ana_v = dw_0[ii]
-            diff = np.abs (num_v - ana_v)
-            if (np.abs(ana_v) < 1e-10) :
-                diff_r = diff
-            else :
-                diff_r = diff / np.abs(ana_v)
-            print ("component  %6u \t value %12.5e n_v %.12e \t diff: %10.2e \t relat: %10.2e" % (ii, ana_v, num_v, diff, diff_r))
-            absolut_e.append (diff)
-            relativ_e.append (diff_r)
-        
-        print ("max absolute %e" % np.max(absolut_e))
-        print ("max relative %e" % np.max(relativ_e))
-
-    def test_virial_dw (self, 
-                        data) :
-        self.make_place ()
-        feed_dict_test0 = self.make_feed_dict0 (data)
-
-        w0 = np.ones (self.ndescrpt)
-        self.net_w_i = np.copy(w0)
-        
-        t_ll, t_dw = self.comp_vl (self.coord, self.box, self.type, self.tnatoms, name = "test_0")
-        self.sess.run (tf.global_variables_initializer())
-        ll_0 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-        dw_0 = self.sess.run (t_dw, feed_dict = feed_dict_test0)
-        
-        hh = 1e-4
-        absolut_e = []
-        relativ_e = []
-        for ii in range (self.ndescrpt) :
-            self.net_w_i = np.copy (w0)
-            self.net_w_i[ii] += hh
-            t_ll, t_dw = self.comp_vl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+1))
-            self.sess.run (tf.global_variables_initializer())
-            ll_1 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            self.net_w_i[ii] -= 2. * hh
-            t_ll, t_dw = self.comp_vl (self.coord, self.box, self.type, self.tnatoms, name = "test_" + str(ii*2+2))
-            self.sess.run (tf.global_variables_initializer())
-            ll_2 = self.sess.run (t_ll, feed_dict = feed_dict_test0)
-            num_v = (ll_1 - ll_2) / (2. * hh)
-            ana_v = dw_0[ii]
-            diff = np.abs (num_v - ana_v)
-            if (np.abs(ana_v) < 1e-10) :
-                diff_r = diff
-            else :
-                diff_r = diff / np.abs(ana_v)
-            print ("component  %6u \t value %12.5e  n_v %12.5e \t diff: %10.2e \t relat: %10.2e" % (ii, ana_v, num_v, diff, diff_r))
-            absolut_e.append (diff)
-            relativ_e.append (diff_r)
-        
-        print ("max absolute %e" % np.max(absolut_e))
-        print ("max relative %e" % np.max(relativ_e))
-
-def _main () :
-    data = DataSets (set_prefix = "set")
-    tf.reset_default_graph()
-
-    with tf.Session() as sess:
-        md = Model (sess, data)
-        md.test_force (data)
-        # md.test_virial (data)
-        # md.test_dw (data)
-        # md.test_virial_dw (data)
-
-if __name__ == '__main__':
-    _main()
-
diff --git a/source/train/Trainer.py b/source/train/Trainer.py
new file mode 100644
index 0000000000..19efc3c998
--- /dev/null
+++ b/source/train/Trainer.py
@@ -0,0 +1,467 @@
+#!/usr/bin/env python3
+import os
+import sys
+import time
+import shutil
+import warnings
+import numpy as np
+import tensorflow as tf
+from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.RunOptions import global_ener_float_precision
+from deepmd.RunOptions import global_cvt_2_tf_float
+from deepmd.RunOptions import global_cvt_2_ener_float
+from Fitting import EnerFitting
+from DescrptLocFrame import DescrptLocFrame
+from DescrptSeA import DescrptSeA
+from DescrptSeR import DescrptSeR
+from Model import Model
+from Loss import LossStd
+from LearningRate import LearningRateExp
+
+from tensorflow.python.framework import ops
+from tensorflow.python.client import timeline
+
+# load force module
+module_path = os.path.dirname(os.path.realpath(__file__)) + "/"
+assert (os.path.isfile (module_path  + "libop_abi.so" )), "op module does not exist"
+op_module = tf.load_op_library(module_path + "libop_abi.so")
+
+# load grad of force module
+sys.path.append (module_path )
+import deepmd._prod_force_grad
+import deepmd._prod_virial_grad
+import deepmd._prod_force_se_a_grad
+import deepmd._prod_virial_se_a_grad
+import deepmd._prod_force_se_r_grad
+import deepmd._prod_virial_se_r_grad
+import deepmd._soft_min_force_grad
+import deepmd._soft_min_virial_grad
+from deepmd.RunOptions import RunOptions
+from deepmd.TabInter import TabInter
+
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+def _is_subdir(path, directory):
+    path = os.path.realpath(path)
+    directory = os.path.realpath(directory)
+    if path == directory:
+        return False
+    relative = os.path.relpath(path, directory) + os.sep
+    return not relative.startswith(os.pardir + os.sep)
+
+
+class NNPTrainer (object):
+    def __init__(self, 
+                 jdata, 
+                 run_opt):
+        self.run_opt = run_opt
+        self._init_param(jdata)
+
+    def _init_param(self, jdata):
+        # model config        
+        model_param = j_must_have(jdata, 'model')
+        descrpt_param = j_must_have(model_param, 'descriptor')
+        fitting_param = j_must_have(model_param, 'fitting_net')
+
+        # descriptor
+        descrpt_type = j_must_have(descrpt_param, 'type')
+        if descrpt_type == 'loc_frame':
+            self.descrpt = DescrptLocFrame(descrpt_param)
+        elif descrpt_type == 'se_a' :
+            self.descrpt = DescrptSeA(descrpt_param)
+        elif descrpt_type == 'se_r' :
+            self.descrpt = DescrptSeR(descrpt_param)
+        else :
+            raise RuntimeError('unknow model type ' + model_type)
+
+        # fitting net
+        try: 
+            fitting_type = fitting_param['type']
+        except:
+            fitting_type = 'ener'
+        if fitting_type == 'ener':
+            self.fitting = EnerFitting(fitting_param, self.descrpt)
+        else :
+            raise RuntimeError('unknow fitting type ' + fitting_type)
+
+        # init model
+        self.model = Model(model_param, self.descrpt, self.fitting)
+
+        # learning rate
+        lr_param = j_must_have(jdata, 'learning_rate')
+        try: 
+            lr_type = lr_param['type']
+        except:
+            lr_type = 'exp'
+        if lr_type == 'exp':
+            self.lr = LearningRateExp(lr_param)
+        else :
+            raise RuntimeError('unknow learning_rate type ' + lr_type)        
+
+        # loss
+        loss_param = j_must_have(jdata, 'loss')
+        try: 
+            loss_type = loss_param['type']
+        except:
+            loss_type = 'std'
+        if loss_type == 'std':
+            self.loss = LossStd(loss_param, self.lr.start_lr())
+        else :
+            raise RuntimeError('unknow loss type ' + loss_type)
+
+        # training
+        training_param = j_must_have(jdata, 'training')
+        
+        self.numb_test = j_must_have (training_param, 'numb_test')
+        self.useBN = False
+
+        self.disp_file = "lcurve.out"
+        if j_have (training_param, "disp_file") : self.disp_file = training_param["disp_file"]
+        self.disp_freq = j_must_have (training_param, 'disp_freq')
+        self.save_freq = j_must_have (training_param, 'save_freq')
+        self.save_ckpt = j_must_have (training_param, 'save_ckpt')
+
+        self.display_in_training = j_must_have (training_param, 'disp_training')
+        self.timing_in_training = j_must_have (training_param, 'time_training')
+        self.profiling = False
+        if j_have (training_param, 'profiling') :
+            self.profiling = training_param['profiling']
+            if self.profiling :
+                self.profiling_file = j_must_have (training_param, 'profiling_file')
+
+        self.sys_weights = None
+        if j_have(training_param, 'sys_weights') :
+            self.sys_weights = training_param['sys_weights']
+
+
+    def _message (self, msg) :
+        self.run_opt.message(msg)
+
+    def build (self, 
+               data) :
+        self.ntypes = self.model.get_ntypes()
+        assert (self.ntypes == data.get_ntypes()), "ntypes should match that found in data"
+
+        self.batch_size = data.get_batch_size()
+
+        self.numb_fparam = data.numb_fparam()
+        if self.numb_fparam > 0 :
+            self._message("training with %d frame parameter(s)" % self.numb_fparam)
+        elif self.numb_fparam < 0 :
+            self._message("training without frame parameter")
+        else :
+            raise RuntimeError("number of frame parameter == 0")
+
+        self.type_map = data.get_type_map()
+
+        davg, dstd, bias_e = self.model.data_stat(data)
+
+        worker_device = "/job:%s/task:%d/%s" % (self.run_opt.my_job_name,
+                                                self.run_opt.my_task_index,
+                                                self.run_opt.my_device)
+
+        with tf.device(tf.train.replica_device_setter(worker_device = worker_device,
+                                                      cluster = self.run_opt.cluster_spec)):
+            self._build_lr()
+            self._build_network(davg, dstd, bias_e)
+            self._build_training()
+
+
+    def _build_lr(self):
+        self._extra_train_ops   = []
+        self.global_step = tf.train.get_or_create_global_step()
+        self.learning_rate = self.lr.build(self.global_step)
+        self._message("built lr")
+
+    def _build_network(self, davg, dstd, bias_atom_e):
+
+        self.t_prop_c           = tf.placeholder(tf.float32, [4],    name='t_prop_c')
+        self.t_energy           = tf.placeholder(global_ener_float_precision, [None], name='t_energy')
+        self.t_force            = tf.placeholder(global_tf_float_precision, [None], name='t_force')
+        self.t_virial           = tf.placeholder(global_tf_float_precision, [None], name='t_virial')
+        self.t_atom_ener        = tf.placeholder(global_tf_float_precision, [None], name='t_atom_ener')
+        self.t_coord            = tf.placeholder(global_tf_float_precision, [None], name='i_coord')
+        self.t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
+        self.t_natoms           = tf.placeholder(tf.int32,   [self.ntypes+2], name='i_natoms')
+        self.t_box              = tf.placeholder(global_tf_float_precision, [None, 9], name='i_box')
+        self.t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
+        self.is_training        = tf.placeholder(tf.bool)
+        if self.numb_fparam > 0 :
+            self.t_fparam       = tf.placeholder(global_tf_float_precision, [None], name='i_fparam')
+        else :
+            self.t_fparam       = None
+
+        self.energy, self.force, self.virial, self.atom_ener, self.atom_virial\
+            = self.model.build (self.t_coord, 
+                                self.t_type, 
+                                self.t_natoms, 
+                                self.t_box, 
+                                self.t_mesh,
+                                self.t_fparam,
+                                davg = davg,
+                                dstd = dstd,
+                                bias_atom_e = bias_atom_e, 
+                                suffix = "", 
+                                reuse = False)
+
+        self.l2_l, self.l2_el, self.l2_fl, self.l2_vl, self.l2_ael \
+            = self.loss.build (self.learning_rate,
+                               self.t_natoms, \
+                               self.t_prop_c, \
+                               self.t_energy, self.energy, \
+                               self.t_force, self.force, \
+                               self.t_virial, self.virial, \
+                               self.t_atom_ener, self.atom_ener, \
+                               suffix = "test")
+
+        self._message("built network")
+
+    def _build_training(self):
+        trainable_variables = tf.trainable_variables()
+        optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
+        if self.run_opt.is_distrib :
+            optimizer = tf.train.SyncReplicasOptimizer(
+                optimizer,
+                replicas_to_aggregate = self.run_opt.cluster_spec.num_tasks("worker"),
+                total_num_replicas = self.run_opt.cluster_spec.num_tasks("worker"),
+                name = "sync_replicas")
+            self.sync_replicas_hook = optimizer.make_session_run_hook(self.run_opt.is_chief)            
+        grads = tf.gradients(self.l2_l, trainable_variables)
+        apply_op = optimizer.apply_gradients (zip (grads, trainable_variables),
+                                              global_step=self.global_step,
+                                              name='train_step')
+        train_ops = [apply_op] + self._extra_train_ops
+        self.train_op = tf.group(*train_ops)
+        self._message("built training")
+
+    def _init_sess_serial(self) :
+        self.sess = tf.Session(
+            config=tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads, 
+                                  inter_op_parallelism_threads=self.run_opt.num_inter_threads
+            ))
+        self.saver = tf.train.Saver()
+        saver = self.saver
+        if self.run_opt.init_mode == 'init_from_scratch' :
+            self._message("initialize model from scratch")
+            init_op = tf.global_variables_initializer()
+            self.sess.run(init_op)
+            fp = open(self.disp_file, "w")
+            fp.close ()
+        elif self.run_opt.init_mode == 'init_from_model' :
+            self._message("initialize from model %s" % self.run_opt.init_model)
+            init_op = tf.global_variables_initializer()
+            self.sess.run(init_op)
+            saver.restore (self.sess, self.run_opt.init_model)            
+            self.sess.run(self.global_step.assign(0))
+            fp = open(self.disp_file, "w")
+            fp.close ()
+        elif self.run_opt.init_mode == 'restart' :
+            self._message("restart from model %s" % self.run_opt.restart)
+            init_op = tf.global_variables_initializer()
+            self.sess.run(init_op)
+            saver.restore (self.sess, self.run_opt.restart)
+        else :
+            raise RuntimeError ("unkown init mode")
+
+    def _init_sess_distrib(self):
+        ckpt_dir = os.path.join(os.getcwd(), self.save_ckpt)
+        assert(_is_subdir(ckpt_dir, os.getcwd())), "the checkpoint dir must be a subdir of the current dir"
+        if self.run_opt.init_mode == 'init_from_scratch' :
+            self._message("initialize model from scratch")
+            if self.run_opt.is_chief :
+                if os.path.exists(ckpt_dir):
+                    shutil.rmtree(ckpt_dir)
+                if not os.path.exists(ckpt_dir) :
+                    os.makedirs(ckpt_dir)
+                fp = open(self.disp_file, "w")
+                fp.close ()
+        elif self.run_opt.init_mode == 'init_from_model' :
+            raise RuntimeError("distributed training does not support %s" % self.run_opt.init_mode)
+        elif self.run_opt.init_mode == 'restart' :
+            self._message("restart from model %s" % ckpt_dir)
+            if self.run_opt.is_chief :
+                assert(os.path.isdir(ckpt_dir)), "the checkpoint dir %s should exists" % ckpt_dir
+        else :
+            raise RuntimeError ("unkown init mode")
+
+        saver = tf.train.Saver(max_to_keep = 1)
+        self.saver = None
+        # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
+        # config = tf.ConfigProto(allow_soft_placement=True,
+        #                         gpu_options = gpu_options,
+        #                         intra_op_parallelism_threads=self.run_opt.num_intra_threads,
+        #                         inter_op_parallelism_threads=self.run_opt.num_inter_threads)
+        config = tf.ConfigProto(intra_op_parallelism_threads=self.run_opt.num_intra_threads,
+                                inter_op_parallelism_threads=self.run_opt.num_inter_threads)
+        # The stop_hook handles stopping after running given steps
+        # stop_hook = tf.train.StopAtStepHook(last_step = stop_batch)
+        # hooks = [self.sync_replicas_hook, stop_hook]
+        hooks = [self.sync_replicas_hook]
+        scaffold = tf.train.Scaffold(saver=saver)
+        # Use monitor session for distributed computation
+        self.sess = tf.train.MonitoredTrainingSession(master = self.run_opt.server.target,
+                                                      is_chief = self.run_opt.is_chief,
+                                                      config = config,
+                                                      hooks = hooks,
+                                                      scaffold = scaffold,
+                                                      checkpoint_dir = ckpt_dir)
+        # ,
+        # save_checkpoint_steps = self.save_freq)
+
+    def train (self, 
+               data, 
+               stop_batch) :
+        if self.run_opt.is_distrib :
+            self._init_sess_distrib()
+        else :
+            self._init_sess_serial()
+
+        self.print_head()
+        fp = None
+        if self.run_opt.is_chief :
+            fp = open(self.disp_file, "a")
+
+        cur_batch = self.sess.run(self.global_step)
+        self.cur_batch = cur_batch
+        self.run_opt.message("start training at lr %.2e (== %.2e), final lr will be %.2e" % 
+                             (self.sess.run(self.learning_rate),
+                              self.lr.value(cur_batch), 
+                              self.lr.value(stop_batch)) 
+        )
+
+        prf_options = None
+        prf_run_metadata = None
+        if self.profiling :
+            prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
+            prf_run_metadata = tf.RunMetadata()
+
+        train_time = 0
+        while cur_batch < stop_batch :
+            batch_prop_c, \
+                batch_energy, batch_force, batch_virial, batch_atom_ener, \
+                batch_coord, batch_box, batch_type, batch_fparam, \
+                natoms_vec, \
+                default_mesh \
+                = data.get_batch (sys_weights = self.sys_weights)
+            cur_batch_size = batch_energy.shape[0]
+            feed_dict_batch = {self.t_prop_c:        batch_prop_c,
+                               self.t_energy:        batch_energy, 
+                               self.t_force:         np.reshape(batch_force, [-1]),
+                               self.t_virial:        np.reshape(batch_virial, [-1]),
+                               self.t_atom_ener:     np.reshape(batch_atom_ener, [-1]),
+                               self.t_coord:         np.reshape(batch_coord, [-1]),
+                               self.t_box:           batch_box,
+                               self.t_type:          np.reshape(batch_type, [-1]),
+                               self.t_natoms:        natoms_vec,
+                               self.t_mesh:          default_mesh,
+                               self.is_training:     True}
+            if self.numb_fparam > 0 :
+                feed_dict_batch[self.t_fparam] = np.reshape(batch_fparam, [-1])
+            if self.display_in_training and cur_batch == 0 :
+                self.test_on_the_fly(fp, data, feed_dict_batch)
+            if self.timing_in_training : tic = time.time()
+            self.sess.run([self.train_op], feed_dict = feed_dict_batch, options=prf_options, run_metadata=prf_run_metadata)
+            if self.timing_in_training : toc = time.time()
+            if self.timing_in_training : train_time += toc - tic
+            cur_batch = self.sess.run(self.global_step)
+            self.cur_batch = cur_batch
+
+            if self.display_in_training and (cur_batch % self.disp_freq == 0) :
+                tic = time.time()
+                self.test_on_the_fly(fp, data, feed_dict_batch)
+                toc = time.time()
+                test_time = toc - tic
+                if self.timing_in_training :
+                    self._message("batch %7d training time %.2f s, testing time %.2f s"
+                                  % (cur_batch, train_time, test_time))
+                    train_time = 0
+                if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.run_opt.is_chief :
+                    if self.saver is not None :
+                        self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt)
+                        self._message("saved checkpoint %s" % self.save_ckpt)
+        if self.run_opt.is_chief: 
+            fp.close ()
+        if self.profiling and self.run_opt.is_chief :
+            fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats)
+            chrome_trace = fetched_timeline.generate_chrome_trace_format()
+            with open(self.profiling_file, 'w') as f:
+                f.write(chrome_trace)
+
+    def get_global_step (self) :
+        return self.sess.run(self.global_step)
+
+    def print_head (self) :
+        if self.run_opt.is_chief:
+            fp = open(self.disp_file, "a")
+            print_str = "# %5s" % 'batch'
+            prop_fmt = '   %9s %9s'
+            print_str += prop_fmt % ('l2_tst', 'l2_trn')
+            if self.loss.has_e :
+                print_str += prop_fmt % ('l2_e_tst', 'l2_e_trn')
+            if self.loss.has_ae :
+                print_str += prop_fmt % ('l2_ae_tst', 'l2_ae_trn')
+            if self.loss.has_f :
+                print_str += prop_fmt % ('l2_f_tst', 'l2_f_trn')
+            if self.loss.has_v :
+                print_str += prop_fmt % ('l2_v_tst', 'l2_v_trn')
+            print_str += '   %8s\n' % 'lr'
+            fp.write(print_str)
+            fp.close ()
+
+    def test_on_the_fly (self,
+                         fp,
+                         data,
+                         feed_dict_batch) :
+        test_prop_c, \
+            test_energy, test_force, test_virial, test_atom_ener, \
+            test_coord, test_box, test_type, test_fparam, \
+            natoms_vec, \
+            default_mesh \
+            = data.get_test ()
+        feed_dict_test = {self.t_prop_c:        test_prop_c,
+                          self.t_energy:        test_energy              [:self.numb_test],
+                          self.t_force:         np.reshape(test_force    [:self.numb_test, :], [-1]),
+                          self.t_virial:        np.reshape(test_virial   [:self.numb_test, :], [-1]),
+                          self.t_atom_ener:     np.reshape(test_atom_ener[:self.numb_test, :], [-1]),
+                          self.t_coord:         np.reshape(test_coord    [:self.numb_test, :], [-1]),
+                          self.t_box:           test_box                 [:self.numb_test, :],
+                          self.t_type:          np.reshape(test_type     [:self.numb_test, :], [-1]),
+                          self.t_natoms:        natoms_vec,
+                          self.t_mesh:          default_mesh,
+                          self.is_training:     False}
+        if self.numb_fparam > 0 :
+            feed_dict_test[self.t_fparam] = np.reshape(test_fparam  [:self.numb_test, :], [-1])
+        error_test, error_e_test, error_f_test, error_v_test, error_ae_test \
+            = self.sess.run([self.l2_l, \
+                             self.l2_el, \
+                             self.l2_fl, \
+                             self.l2_vl, \
+                             self.l2_ael], 
+                            feed_dict=feed_dict_test)
+        error_train, error_e_train, error_f_train, error_v_train, error_ae_train \
+            = self.sess.run([self.l2_l, \
+                             self.l2_el, \
+                             self.l2_fl, \
+                             self.l2_vl, \
+                             self.l2_ael], 
+                            feed_dict=feed_dict_batch)
+        cur_batch = self.cur_batch
+        current_lr = self.sess.run(self.learning_rate)
+        if self.run_opt.is_chief:
+            print_str = "%7d" % cur_batch
+            prop_fmt = "   %9.2e %9.2e"
+            print_str += prop_fmt % (np.sqrt(error_test), np.sqrt(error_train))
+            if self.loss.has_e :
+                print_str += prop_fmt % (np.sqrt(error_e_test) / natoms_vec[0], np.sqrt(error_e_train) / natoms_vec[0])
+            if self.loss.has_ae :
+                print_str += prop_fmt % (np.sqrt(error_ae_test), np.sqrt(error_ae_train))
+            if self.loss.has_f :
+                print_str += prop_fmt % (np.sqrt(error_f_test), np.sqrt(error_f_train))
+            if self.loss.has_v :
+                print_str += prop_fmt % (np.sqrt(error_v_test) / natoms_vec[0], np.sqrt(error_v_train) / natoms_vec[0])
+            print_str += "   %8.1e\n" % current_lr
+            fp.write(print_str)
+            fp.flush ()
+
+
diff --git a/source/train/__init__.py b/source/train/__init__.py
new file mode 100644
index 0000000000..4fe1d7b3f7
--- /dev/null
+++ b/source/train/__init__.py
@@ -0,0 +1 @@
+from .DeepPot import DeepPot
diff --git a/source/train/__main__.py b/source/train/__main__.py
new file mode 100644
index 0000000000..94f248be40
--- /dev/null
+++ b/source/train/__main__.py
@@ -0,0 +1,74 @@
+import argparse
+
+from .train import train
+from .freeze import freeze
+from .config import config
+from .test import test
+
+def _main () :    
+    parser = argparse.ArgumentParser(
+	description="deepmd-kit")
+    subparsers = parser.add_subparsers(title='Valid subcommands', dest='command')    
+
+    parser_cfig = subparsers.add_parser('config', help='fast configuration of parameter file for smooth model')
+    parser_cfig.add_argument("-o", "--output", type=str, default = "input.json", 
+                             help="the output json file")    
+    
+    default_num_inter_threads = 0
+    parser_train = subparsers.add_parser('train', help='train a model')
+    parser_train.add_argument('INPUT', 
+                              help='the input json database ')
+    parser_train.add_argument('-t','--inter-threads', type = int, default = default_num_inter_threads,
+                              help=
+                              'With default value %d. ' % default_num_inter_threads + 
+                              'Setting the "inter_op_parallelism_threads" key for the tensorflow, '  +
+                              'the "intra_op_parallelism_threads" will be set by the env variable OMP_NUM_THREADS')
+    parser_train.add_argument('--init-model', type = str, 
+                              help=
+                              'Initialize the model by the provided checkpoint.')
+    parser_train.add_argument('--restart', type = str, 
+                              help=
+                              'Restart the training from the provided checkpoint.')
+    
+    default_frozen_nodes = "o_energy,o_force,o_virial,o_atom_energy,o_atom_virial,model_attr/rcut,model_attr/ntypes,model_attr/dfparam,model_attr/tmap"
+    parser_frz = subparsers.add_parser('freeze', help='freeze the model')
+    parser_frz.add_argument("-d", "--folder", type=str, default = ".", 
+                            help="path to checkpoint folder")
+    parser_frz.add_argument("-o", "--output", type=str, default = "frozen_model.pb", 
+                            help="name of graph, will output to the checkpoint folder")
+    parser_frz.add_argument("-n", "--nodes", type=str, default = default_frozen_nodes,
+                            help="the frozen nodes, defaults is " + default_frozen_nodes)
+
+    parser_tst = subparsers.add_parser('test', help='test the model')
+    parser_tst.add_argument("-m", "--model", default="frozen_model.pb", type=str, 
+                            help="Frozen model file to import")
+    parser_tst.add_argument("-s", "--system", default=".", type=str, 
+                            help="The system dir")
+    parser_tst.add_argument("-S", "--set-prefix", default="set", type=str, 
+                            help="The set prefix")
+    parser_tst.add_argument("-n", "--numb-test", default=100, type=int, 
+                            help="The number of data for test")
+    parser_tst.add_argument("-r", "--rand-seed", type=int, 
+                            help="The random seed")
+    parser_tst.add_argument("--shuffle-test", action = 'store_true', 
+                            help="Shuffle test data")
+    parser_tst.add_argument("-d", "--detail-file", type=str, 
+                            help="The file containing details of energy force and virial accuracy")
+
+    args = parser.parse_args()
+
+    if args.command is None :
+        parser.print_help()
+        exit
+    if args.command == 'train' :
+        train(args)
+    elif args.command == 'freeze' :
+        freeze(args)
+    elif args.command == 'config' :
+        config(args)
+    elif args.command == 'test' :
+        test(args)
+    else :
+        raise RuntimeError('unknown command ' + args.command)
+
+_main()
diff --git a/source/train/common.py b/source/train/common.py
new file mode 100644
index 0000000000..db53d882a5
--- /dev/null
+++ b/source/train/common.py
@@ -0,0 +1,20 @@
+def j_must_have (jdata, key) :
+    if not key in jdata.keys() :
+        raise RuntimeError ("json database must provide key " + key )
+    else :
+        return jdata[key]
+
+def j_must_have_d (jdata, key, deprecated_key) :
+    if not key in jdata.keys() :
+        # raise RuntimeError ("json database must provide key " + key )
+        for ii in deprecated_key :
+            if ii in jdata.keys() :
+                warnings.warn("the key \"%s\" is deprecated, please use \"%s\" instead" % (ii,key))
+                return jdata[ii]
+        raise RuntimeError ("json database must provide key " + key )        
+    else :
+        return jdata[key]
+
+def j_have (jdata, key) :
+    return key in jdata.keys() 
+
diff --git a/source/train/print_old_model.py b/source/train/print_old_model.py
new file mode 100644
index 0000000000..90ec52de1a
--- /dev/null
+++ b/source/train/print_old_model.py
@@ -0,0 +1,89 @@
+import dpdata,os,sys,json
+import numpy as np
+import tensorflow as tf
+from common import Data
+
+# hash: b721960c9d5c61ee161f9e929c7d76f77673bc10
+
+lib_path = os.path.dirname(os.path.realpath(__file__)) + ".."
+sys.path.append (lib_path)
+
+from deepmd.RunOptions import RunOptions
+from deepmd.DataSystem import DataSystem
+from deepmd.Model import NNPModel
+from deepmd.Model import LearingRate
+from deepmd.common import j_must_have, j_must_have_d, j_have
+
+def gen_data() :
+    tmpdata = Data(rand_pert = 0.1, seed = 1)
+    sys = dpdata.LabeledSystem()
+    sys.data['coords'] = tmpdata.coord
+    sys.data['atom_types'] = tmpdata.atype
+    sys.data['cells'] = tmpdata.cell
+    nframes = tmpdata.nframes
+    natoms = tmpdata.natoms
+    print(sys.data['coords'])
+    sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
+    sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
+    sys.data['energies'] = np.zeros([nframes,1])
+    sys.data['forces'] = np.zeros([nframes,natoms,3])
+    sys.data['virials'] = []
+    sys.to_deepmd_npy('system', prec=np.float64)    
+    np.save('system/set.000/fparam.npy', tmpdata.fparam)
+
+def compute_efv(jfile):
+    fp = open (jfile, 'r')
+    jdata = json.load (fp)
+    run_opt = RunOptions(None) 
+    systems = j_must_have(jdata, 'systems')
+    set_pfx = j_must_have(jdata, 'set_prefix')
+    batch_size = j_must_have(jdata, 'batch_size')
+    test_size = j_must_have(jdata, 'numb_test')
+    batch_size = 1
+    test_size = 1
+    stop_batch = j_must_have(jdata, 'stop_batch')
+    rcut = j_must_have (jdata, 'rcut')
+
+    data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt)
+
+    tot_numb_batches = sum(data.get_nbatches())
+    lr = LearingRate (jdata, tot_numb_batches)
+
+    model = NNPModel (jdata, run_opt = run_opt)
+    model.build (data, lr)
+
+    test_prop_c, \
+        test_energy, test_force, test_virial, test_atom_ener, \
+        test_coord, test_box, test_type, test_fparam, \
+        natoms_vec, \
+        default_mesh \
+        = data.get_test ()
+
+    feed_dict_test = {model.t_prop_c:        test_prop_c,
+                      model.t_energy:        test_energy              [:model.numb_test],
+                      model.t_force:         np.reshape(test_force    [:model.numb_test, :], [-1]),
+                      model.t_virial:        np.reshape(test_virial   [:model.numb_test, :], [-1]),
+                      model.t_atom_ener:     np.reshape(test_atom_ener[:model.numb_test, :], [-1]),
+                      model.t_coord:         np.reshape(test_coord    [:model.numb_test, :], [-1]),
+                      model.t_box:           test_box                 [:model.numb_test, :],
+                      model.t_type:          np.reshape(test_type     [:model.numb_test, :], [-1]),
+                      model.t_natoms:        natoms_vec,
+                      model.t_mesh:          default_mesh,
+                      model.t_fparam:        np.reshape(test_fparam   [:model.numb_test, :], [-1]),
+                      model.is_training:     False}
+
+    sess = tf.Session()
+    sess.run(tf.global_variables_initializer())
+    [e, f, v] = sess.run([model.energy, model.force, model.virial], 
+                         feed_dict = feed_dict_test)
+    return e,f,v
+
+def _main() :
+    gen_data()
+    e,f,v = compute_efv('water_smth.json')
+    np.savetxt('e.out', e, delimiter=',')
+    np.savetxt('f.out', f, delimiter=',')
+    np.savetxt('v.out', v, delimiter=',')
+    
+
+_main()
diff --git a/source/train/test.py b/source/train/test.py
index 34fe00a5a1..b8d5c85191 100755
--- a/source/train/test.py
+++ b/source/train/test.py
@@ -7,123 +7,38 @@
 import numpy as np
 import tensorflow as tf
 
-lib_path = os.path.dirname(os.path.realpath(__file__)) + "/../lib/"
-sys.path.append (lib_path)
-
-from deepmd.Data import DataSets
-
+from Data import DataSets
+from DeepPot import DeepPot
 from tensorflow.python.framework import ops
 
-# load force module
-module_path = os.path.dirname(os.path.realpath(__file__)) + "/../lib/"
-assert (os.path.isfile (module_path  + "deepmd/libop_abi.so" )), "force module does not exist"
-op_module = tf.load_op_library(module_path + "deepmd/libop_abi.so")
-
-# load grad of force module
-sys.path.append (module_path )
-import deepmd._prod_force_grad
-import deepmd._prod_virial_grad
-
-def load_graph(frozen_graph_filename, 
-               prefix = 'load'):
-    # We load the protobuf file from the disk and parse it to retrieve the 
-    # unserialized graph_def
-    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
-        graph_def = tf.GraphDef()
-        graph_def.ParseFromString(f.read())
-
-    # Then, we can use again a convenient built-in function to import a graph_def into the 
-    # current default Graph
-    with tf.Graph().as_default() as graph:
-        tf.import_graph_def(
-            graph_def, 
-            input_map=None, 
-            return_elements=None, 
-            name=prefix, 
-            producer_op_list=None
-        )
-    return graph
-
-def rep_int (s):
-    try: 
-        int(s)
-        return True
-    except ValueError:
-        return False
-
-def analyze_ntype (graph) :
-    names = []
-    for op in graph.get_operations():
-        f1 = op.name.split('/')[1]
-        if ('layer' in f1) and (not 'gradients'in f1) and (not 'final' in f1) :
-            f1_fs = f1.split ('_')
-            assert len(f1_fs) == 4 and rep_int (f1_fs[-1]), "unexpected field of " + f1_fs
-            names.append (int(f1_fs[-1]))
-    s_name = sorted(set(names))
-    assert len(s_name)-1 == s_name[-1], "the type is not an seq, unexpected"
-    return len(s_name)
-
 def l2err (diff) :
     return np.sqrt(np.average (diff*diff))
 
-def test (sess, data, numb_test = None, detail_file = None) :
-    graph = sess.graph
-    ntypes = analyze_ntype (graph)
-
-    natoms_vec = data.get_natoms_vec (ntypes)
-    natoms_vec = natoms_vec.astype(np.int32)
-    
-    test_prop_c, test_energy, test_force, test_virial, test_ae, test_coord, test_box, test_type = data.get_test ()
-    if numb_test > test_coord.shape[0] :
-        print ("# numb_test %d larger than size of dataset %d, is set to %d" 
-               % (numb_test, test_coord.shape[0], test_coord.shape[0]) )
-        numb_test = test_coord.shape[0]
-
-    ncell = np.ones (3, dtype=np.int32)
-    avg_box = np.average (test_box, axis = 0)
-    cell_size = 3
-    avg_box = np.reshape (avg_box, [3,3])
-    for ii in range (3) :
-        ncell[ii] = int ( np.linalg.norm(avg_box[ii]) / cell_size )
-        if (ncell[ii] < 2) : ncell[ii] = 2
-    default_mesh = np.zeros (6, dtype = np.int32)
-    default_mesh[3] = ncell[0]
-    default_mesh[4] = ncell[1]
-    default_mesh[5] = ncell[2]
-
-    t_coord  = graph.get_tensor_by_name ('load/t_coord:0')
-    t_type   = graph.get_tensor_by_name ('load/t_type:0')
-    t_natoms = graph.get_tensor_by_name ('load/t_natoms:0')
-    t_box    = graph.get_tensor_by_name ('load/t_box:0')
-    t_mesh   = graph.get_tensor_by_name ('load/t_mesh:0')
-
-    t_energy = graph.get_tensor_by_name ('load/energy_test:0')
-    t_force  = graph.get_tensor_by_name ('load/force_test:0')
-    t_virial = graph.get_tensor_by_name ('load/virial_test:0')
-
-    energy = []
-    force = []
-    virial = []
-    for ii in range(numb_test) :
-        feed_dict_test = {t_coord:         np.reshape(test_coord   [ii:ii+1, :], [-1]),
-                          t_box:           test_box                [ii:ii+1, :],
-                          t_type:          np.reshape(test_type    [ii:ii+1, :], [-1]),
-                          t_natoms:        natoms_vec,
-                          t_mesh:          default_mesh}
-        tmp_energy, tmp_force, tmp_virial = sess.run ([t_energy, t_force, t_virial], feed_dict = feed_dict_test)
-        energy.append(tmp_energy)
-        force .append(tmp_force)
-        virial.append(tmp_virial)        
+def test (args) :
+    if args.rand_seed is not None :
+        np.random.seed(args.rand_seed % (2**32))
 
-    energy = np.reshape (energy, [numb_test])
-    force  = np.reshape (force , [numb_test, -1])
-    virial = np.reshape (virial, [numb_test, -1])
+    data = DataSets (args.system, args.set_prefix, shuffle_test = args.shuffle_test)
+    test_prop_c, test_energy, test_force, test_virial, test_ae, test_coord, test_box, test_type, test_fparam = data.get_test ()
+    numb_test = args.numb_test
+    natoms = len(test_type[0])
+    nframes = test_box.shape[0]
+    dp = DeepPot(args.model)
+    coord = test_coord[:numb_test].reshape([numb_test, -1])
+    box = test_box[:numb_test]
+    atype = test_type[0]
+    energy, force, virial, ae, av = dp.eval(coord, box, atype, fparam = test_fparam, atomic = True)
+    energy = energy.reshape([nframes,1])
+    force = force.reshape([nframes,-1])
+    virial = virial.reshape([nframes,9])
+    ae = ae.reshape([nframes,-1])
+    av = av.reshape([nframes,-1])
 
     l2e = (l2err (energy - test_energy[:numb_test]))
     l2f = (l2err (force  - test_force [:numb_test]))
     l2v = (l2err (virial - test_virial[:numb_test]))
-    l2ea= l2e/natoms_vec[0]
-    l2va= l2v/natoms_vec[0]
+    l2ea= l2e/natoms
+    l2va= l2v/natoms
 
     # print ("# energies: %s" % energy)
     print ("# number of test data : %d " % numb_test)
@@ -133,6 +48,7 @@ def test (sess, data, numb_test = None, detail_file = None) :
     print ("Virial L2err        : %e eV" % l2v)
     print ("Virial L2err/Natoms : %e eV" % l2va)
 
+    detail_file = args.detail_file
     if detail_file is not None :
         pe = np.concatenate((np.reshape(test_energy[:numb_test], [-1,1]),
                              np.reshape(energy, [-1,1])), 
@@ -150,33 +66,3 @@ def test (sess, data, numb_test = None, detail_file = None) :
         np.savetxt(detail_file+".v.out", pv,
                    header = 'data_vxx data_vxy data_vxz data_vyx data_vyy data_vyz data_vzx data_vzy data_vzz pred_vxx pred_vxy pred_vxz pred_vyx pred_vyy pred_vyz pred_vzx pred_vzy pred_vzz')        
 
-def _main () :
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-m", "--model", default="frozen_model.pb", type=str, 
-                        help="Frozen model file to import")
-    parser.add_argument("-s", "--system", default=".", type=str, 
-                        help="The system dir")
-    parser.add_argument("-S", "--set-prefix", default="set", type=str, 
-                        help="The set prefix")
-    parser.add_argument("-n", "--numb-test", default=100, type=int, 
-                        help="The number of data for test")
-    parser.add_argument("-r", "--rand-seed", type=int, 
-                        help="The random seed")
-    parser.add_argument("-d", "--detail-file", type=str, 
-                        help="The file containing details of energy force and virial accuracy")
-    args = parser.parse_args()
-
-    if args.rand_seed is not None :
-        np.random.seed(args.rand_seed % (2**32))
-
-    graph = load_graph(args.model)
-    data = DataSets (args.system, args.set_prefix)
-
-    with tf.Session(graph = graph) as sess:        
-        test (sess, data, args.numb_test, args.detail_file)
-
-    # for op in graph.get_operations():
-    #     print (op.name)
-
-if __name__ == '__main__':
-    _main()
diff --git a/source/train/train.py b/source/train/train.py
index cdd22ce1da..a7ccb987fe 100755
--- a/source/train/train.py
+++ b/source/train/train.py
@@ -13,8 +13,7 @@
 
 from deepmd.RunOptions import RunOptions
 from deepmd.DataSystem import DataSystem
-from deepmd.Model import NNPModel
-from deepmd.Model import LearingRate
+from deepmd.Trainer import NNPTrainer
 
 def create_done_queue(cluster_spec, task_index):
    with tf.device("/job:ps/task:%d" % (task_index)):
@@ -50,25 +49,7 @@ def j_must_have (jdata, key) :
     else :
         return jdata[key]
 
-def _main () :
-    default_num_inter_threads = 0
-    parser = argparse.ArgumentParser(
-        description="*** Train a model. ***")
-    parser.add_argument('INPUT', 
-                        help='the input json database ')
-    parser.add_argument('-t','--inter-threads', type = int, default = default_num_inter_threads,
-                        help=
-                        'With default value %d. ' % default_num_inter_threads + 
-                        'Setting the "inter_op_parallelism_threads" key for the tensorflow, '  +
-                        'the "intra_op_parallelism_threads" will be set by the env variable OMP_NUM_THREADS')
-    parser.add_argument('--init-model', type = str, 
-                        help=
-                        'Initialize the model by the provided checkpoint.')
-    parser.add_argument('--restart', type = str, 
-                        help=
-                        'Restart the training from the provided checkpoint.')
-    args = parser.parse_args()
-
+def train (args) :
     # load json database
     fp = open (args.INPUT, 'r')
     jdata = json.load (fp)
@@ -98,25 +79,25 @@ def _main () :
         _do_work(jdata, run_opt)
 
 def _do_work(jdata, run_opt):
+    # init the model
+    model = NNPTrainer (jdata, run_opt = run_opt)
+    rcut = model.model.get_rcut()
     # init params and run options
-    systems = j_must_have(jdata, 'systems')
-    set_pfx = j_must_have(jdata, 'set_prefix')
+    assert('training' in jdata)
+    systems = j_must_have(jdata['training'], 'systems')
+    set_pfx = j_must_have(jdata['training'], 'set_prefix')
     numb_sys = len(systems)
     seed = None
-    if 'seed' in jdata.keys() : seed = jdata['seed']
-    seed = seed % (2**32)
+    if 'seed' in jdata['training'].keys() : seed = jdata['training']['seed']
+    if seed is not None:
+       seed = seed % (2**32)
     np.random.seed (seed)
-    batch_size = j_must_have(jdata, 'batch_size')
-    test_size = j_must_have(jdata, 'numb_test')
-    stop_batch = j_must_have(jdata, 'stop_batch')
-    rcut = j_must_have (jdata, 'rcut')
+    batch_size = j_must_have(jdata['training'], 'batch_size')
+    test_size = j_must_have(jdata['training'], 'numb_test')
+    stop_batch = j_must_have(jdata['training'], 'stop_batch')
     data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt)
-    tot_numb_batches = sum(data.get_nbatches())
-    lr = LearingRate (jdata, tot_numb_batches)
-    # init the model
-    model = NNPModel (jdata, run_opt = run_opt)
     # build the model with stats from the first system
-    model.build (data, lr)
+    model.build (data)
     # train the model with the provided systems in a cyclic way
     start_time = time.time()
     cur_batch = 0
@@ -124,6 +105,3 @@ def _do_work(jdata, run_opt):
     end_time = time.time()
     run_opt.message("finished training\nwall time: %.3f s" % (end_time-start_time))
 
-if __name__ == '__main__':
-    _main()
-