From 4648764c2a92d58a56e4ca33e4711d824c100a4e Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 18 Jun 2021 04:25:59 -0400
Subject: [PATCH 1/8] only add inputs_zero node if atom_ener exists (#766)

---
 deepmd/fit/ener.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index 2708aa7621..03145076cb 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -357,7 +357,9 @@ def build (self,
                                                 initializer = tf.constant_initializer(self.aparam_inv_std))
             
         inputs = tf.cast(tf.reshape(inputs, [-1, self.dim_descrpt * natoms[0]]), self.fitting_precision)
-        inputs_zero = tf.zeros_like(inputs, dtype=GLOBAL_TF_FLOAT_PRECISION)
+        if len(self.atom_ener):
+            # only for atom_ener
+            inputs_zero = tf.zeros_like(inputs, dtype=GLOBAL_TF_FLOAT_PRECISION)
         
 
         if bias_atom_e is not None :

From 0c7f490de24c21bcf57089336cbb5c584cf5b6f8 Mon Sep 17 00:00:00 2001
From: tuoping <80671886+tuoping@users.noreply.github.com>
Date: Sat, 19 Jun 2021 17:39:48 +0800
Subject: [PATCH 2/8] add type-embedding developer doc (#762)

* add type-embedding developer doc

* add link to development/type-embedding.md in train-se-e2-a-tebd.md

* changed the link in doc/development/type-embedding.md to relative.

Co-authored-by: tuoping <abby@DESKTOP-LV5KL0D.localdomain>
---
 doc/development/index.md          |  1 +
 doc/development/type-embedding.md | 67 +++++++++++++++++++++++++++++++
 doc/train-se-e2-a-tebd.md         |  6 ++-
 3 files changed, 72 insertions(+), 2 deletions(-)
 create mode 100644 doc/development/type-embedding.md

diff --git a/doc/development/index.md b/doc/development/index.md
index 6f456a06e6..79fab0d980 100644
--- a/doc/development/index.md
+++ b/doc/development/index.md
@@ -3,3 +3,4 @@
 - [Python API](../api.rst)
 - [C++ API](../API_CC/api_cc.rst)
 - [Coding Conventions](coding-conventions.rst)
+- [Atom Type Embedding](type-embedding.md)
diff --git a/doc/development/type-embedding.md b/doc/development/type-embedding.md
new file mode 100644
index 0000000000..17c8a63ba5
--- /dev/null
+++ b/doc/development/type-embedding.md
@@ -0,0 +1,67 @@
+# Atom Type Embedding
+## Overview
+Here is an overview of the deepmd-kit algorithm. Given a specific centric atom, we can obtain the matrix describing its local environment, named as `R`. It is consist of the distance between centric atom and its neighbors, as well as a direction vector. We can embed each distance into a vector of M1 dimension by a `embedding net`, so the environment matrix `R` can be embed into matrix `G`. We can thus extract a descriptor vector (of M1*M2 dim) of the centric atom from the `G` by some matrix multiplication, and put the descriptor into `fitting net` to get predicted energy `E`. The vanilla version of deepmd-kit build `embedding net` and `fitting net` relying on the atom type, resulting in O(N) memory usage. After applying atom type embedding, in deepmd-kit v2.0, we can share one `embedding net` and one `fitting net` in total, which decline training complexity largely. 
+
+## Preliminary
+In the following chart, you can find the meaning of symbols used to clarify the atom type embedding algorithm.
+
+Symbol| Meaning
+---Atom| :---:
+is| Type of centric atom
+j| Type of neighbor atom
+s_ij| Distance between centric atom and neighbor atom
+G_ij(·)Atom| Origin embedding net, take s_ij as input and output embedding vector of M1 dim
+G(·) | Shared embedding net
+Multi(·) | Matrix multiplication and flattening, output the descriptor vector of M1*M2 dim
+F_i(·) | Origin fitting net, take the descriptor vector as input and output energy
+F(·) | Shared fitting net
+A(·) | Atom type embedding net, input is atom type, output is type embedding vector of dim `nchanl`
+
+So, we can formulate the training process as follows.
+Vanilla deepmd-kit algorithm:
+```
+Energy = F_i( Multi( G_ij( s_ij ) ) )
+```
+Deepmd-kit applying atom type embedding:
+```
+Energy = F( [ Multi( G_ij( [s_ij, A(i), A(j)] ) ), A(j)] )
+```
+or 
+```
+Energy = F( [ Multi( G_ij( [s_ij, A(j)] ) ), A(j)] )
+```
+The difference between two variants above is whether using the information of centric atom when generating the descriptor. Users can choose by modifying the `type_one_side` hyper-parameter in the input json file.
+
+## How to use
+A detailed introduction can be found at [`se_e2_a_tebd`](../train-se-e2-a-tebd.md). Looking for a fast start up, you can simply add a `type_embedding` section in the input json file as displayed in the following, and the algorithm will adopt atom type embedding algorithm automatically.
+An example of `type_embedding` is like
+```json=
+    "type_embedding":{
+                "neuron":Type[2, 4, 8],
+                        "resnet_dt":Atomfalse,
+                                "seed":Type1
+                                    }
+```
+
+
+## Code Modification
+Atom type embedding can be applied to varied `embedding net` and `fitting net`, as a result we build a class `TypeEmbedNet` to support this free combination. In the following, we will go through the execution process of the code to explain our code modification.
+
+### trainer (train/trainer.py)
+In trainer.py, it will parse the parameter from the input json file. If a `type_embedding` section is detected, it will build a `TypeEmbedNet`, which will be later input in the `model`. `model` will be built in the function `_build_network`.
+### model (model/ener.py)
+When building the operation graph of the `model` in `model.build`. If a `TypeEmbedNet` is detected, it will build the operation graph of `type embed net`, `embedding net` and `fitting net` by order. The building process of `type embed net` can be found in `TypeEmbedNet.build`, which output the type embedding vector of each atom type (of [ntypes * nchanl] dimension). We then save the type embedding vector into `input_dict`, so that they can be fetched later in `embedding net` and `fitting net`.
+### embedding net (descriptor/se*.py)
+In `embedding net`, we shall take local environment `R` as input and output matrix `G`. Functions called in this process by order is 
+```
+build -> _pass_filter -> _filter -> _filter_lower 
+```
+* `_pass_filter`: It will first detect whether an atom type embedding exists, if so, it will apply atom type embedding algorithm and doesn't divide the input by type.
+* `_filter`: It will call `_filter_lower` function to obtain the result of matrix multiplication (`G^T·R` ), do further multiplication involved in Multi(·), and finally output the result of descriptor vector of M1*M2 dim.
+* `_filter_lower`: The main function handling input modification. If type embedding exists, it will call `_concat_type_embedding` function to concat the first column of input `R` (the column of s_ij) with the atom type embedding information. It will decide whether using the atom type embedding vector of centric atom according to the value of `type_one_side` (if set **True**, then we only use the vector of the neighbor atom). The modified input will be put into the `fitting net` to get `G` for further matrix multiplication stage.
+
+### fitting net (fit/ener.py)
+In `fitting net`, it take the descriptor vector as input, whose dimension is [natoms, (M1*M2)]. Because we need to involve information of centric atom in this step, we need to generate a matrix named as `atype_embed` (of dim [natoms, nchanl]), in which each row is the type embedding vector of the specific centric atom. The input is sorted by type of centric atom, we also know the number of a particular atom type (stored in `natoms[2+i]`), thus we get the type vector of centric atom. In the build phrase of fitting net, it will check whether type embedding exist in `input_dict` and fetch them. After that calling `embed_atom_type` function to lookup embedding vector for type vector of centric atom to obtain `atype_embed`, and concat input with it ([input, atype_embed]). The modified input go through `fitting net` to get predicted energy.
+
+
+**P.S.: You can't apply compression method while using atom type embedding**
diff --git a/doc/train-se-e2-a-tebd.md b/doc/train-se-e2-a-tebd.md
index e895adc858..2179b8b598 100644
--- a/doc/train-se-e2-a-tebd.md
+++ b/doc/train-se-e2-a-tebd.md
@@ -35,8 +35,10 @@ The construction of type embedding net is given by `type_embedding`. An example
 * `seed` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 
-
 A complete training input script of this example can be find in the directory. 
 ```bash
 $deepmd_source_dir/examples/water/se_e2_a_tebd/input.json
-```
\ No newline at end of file
+```
+See [here](development/type-embedding.md) for further explanation of `type embedding`.
+
+**P.S.: You can't apply compression method while using atom type embedding**

From f326a86dba80287c8dc3f12db39ea640f86e3eef Mon Sep 17 00:00:00 2001
From: Denghui Lu <denghuilu@pku.edu.cn>
Date: Sat, 19 Jun 2021 17:40:33 +0800
Subject: [PATCH 3/8] add model compression support for  models with
 exclude_types feature (#754)

---
 deepmd/descriptor/se_a.py              |   2 +-
 deepmd/utils/tabulate.py               |  99 ++++++++++--------
 source/tests/test_model_compression.py | 134 +++++++++++++++++++++++++
 3 files changed, 194 insertions(+), 41 deletions(-)

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 10ea49abd9..a95f0ca39a 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -268,7 +268,7 @@ def enable_compression(self,
         self.compress = True
         self.model_file = model_file
         self.table_config = [table_extrapolate, table_stride_1, table_stride_2, check_frequency]
-        self.table = DeepTabulate(self.model_file, self.type_one_side)
+        self.table = DeepTabulate(self.model_file, self.type_one_side, self.exclude_types)
         self.lower, self.upper \
             = self.table.build(min_nbor_dist, 
                                table_extrapolate, 
diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index 757189c16d..944428bf48 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -19,7 +19,8 @@ class DeepTabulate():
     """
     def __init__(self,
                  model_file : str,
-                 type_one_side : bool = False) -> None:
+                 type_one_side : bool = False,
+                 exclude_types : List[List[int]] = []) -> None:
         """
         Constructor
 
@@ -29,10 +30,15 @@ def __init__(self,
                 The frozen model
         type_one_side
                 Try to build N_types tables. Otherwise, building N_types^2 tables
+        exclude_types : list[list[int]]
+                The Excluded types
         """
 
         self.model_file = model_file
         self.type_one_side = type_one_side
+        self.exclude_types = exclude_types
+        if self.type_one_side and len(self.exclude_types) != 0:
+            raise RunTimeError('"type_one_side" is not compatible with "exclude_types"')
 
         self.graph, self.graph_def = self._load_graph()
         self.sess = tf.Session(graph = self.graph)
@@ -55,19 +61,26 @@ def __init__(self,
         self.rcut_smth = self.descrpt.get_attr('rcut_r_smth')
 
         self.filter_variable_nodes = self._load_matrix_node()
-        self.layer_size = int(len(self.filter_variable_nodes) / (self.ntypes * self.ntypes * 2))
+        for tt in self.exclude_types:
+            if (tt[0] not in range(self.ntypes)) or (tt[1] not in range(self.ntypes)):
+                raise RuntimeError("exclude types" + str(tt) + " must within the number of atomic types " + str(self.ntypes) + "!")
+        if (self.ntypes * self.ntypes - len(self.exclude_types) == 0):
+            raise RuntimeError("empty embedding-net are not supported in model compression!")
+        
+        self.layer_size = len(self.filter_variable_nodes) // ((self.ntypes * self.ntypes - len(self.exclude_types)) * 2)
         self.table_size = self.ntypes * self.ntypes
         if type_one_side :
-            self.layer_size = int(len(self.filter_variable_nodes) / (self.ntypes * 2))
+            self.layer_size = len(self.filter_variable_nodes) // (self.ntypes * 2)
             self.table_size = self.ntypes
         # self.value_type = self.filter_variable_nodes["filter_type_0/matrix_1_0"].dtype #"filter_type_0/matrix_1_0" must exit~
         # get trained variables
         self.bias = self._get_bias()
         self.matrix = self._get_matrix()
 
-        self.data_type = type(self.matrix["layer_1"][0][0][0])
-        assert self.matrix["layer_1"][0].size > 0, "no matrix exist in matrix array!"
-        self.last_layer_size = self.matrix["layer_" + str(self.layer_size)][0].shape[1]
+        for item in self.matrix["layer_" + str(self.layer_size)]:
+            if len(item) != 0:
+                self.data_type = type(item[0][0])
+                self.last_layer_size = item.shape[1]
         # define tables
         self.data = {}
 
@@ -91,7 +104,7 @@ def build(self,
                 The uniform stride of the first table
         stride1
                 The uniform stride of the second table
-        
+
         Returns
         ----------
         lower
@@ -106,27 +119,27 @@ def build(self,
         xx = np.append(xx, np.array([extrapolate * upper], dtype = self.data_type))
         self.nspline = int((upper - lower) / stride0 + (extrapolate * upper - upper) / stride1)
         for ii in range(self.table_size):
-            vv, dd, d2 = self._make_data(xx, ii)
-            if self.type_one_side:
-                net = "filter_-1_net_" + str(int(ii))
-            else:
-                net = "filter_" + str(int(ii / self.ntypes)) + "_net_" + str(int(ii % self.ntypes))
-            self.data[net] = np.zeros([self.nspline, 6 * self.last_layer_size], dtype = self.data_type)
-            # for jj in tqdm(range(self.nspline), desc = 'DEEPMD INFO    |-> deepmd.utils.tabulate\t\t\t' + net + ', tabulating'):
-            for jj in range(self.nspline):
-                for kk in range(self.last_layer_size):
-                    if jj < int((upper - lower) / stride0):
-                        tt = stride0
-                    else:
-                        tt = stride1
-                    hh = vv[jj + 1][kk] - vv[jj][kk]
-                    self.data[net][jj][kk * 6 + 0] = vv[jj][kk]
-                    self.data[net][jj][kk * 6 + 1] = dd[jj][kk]
-                    self.data[net][jj][kk * 6 + 2] = 0.5 * d2[jj][kk]
-                    self.data[net][jj][kk * 6 + 3] = (1 / (2 * tt * tt * tt)) * (20 * hh - (8 * dd[jj + 1][kk] + 12 * dd[jj][kk]) * tt - (3 * d2[jj][kk] - d2[jj + 1][kk]) * tt * tt)
-                    self.data[net][jj][kk * 6 + 4] = (1 / (2 * tt * tt * tt * tt)) * (-30 * hh + (14 * dd[jj + 1][kk] + 16 * dd[jj][kk]) * tt + (3 * d2[jj][kk] - 2 * d2[jj + 1][kk]) * tt * tt)
-                    self.data[net][jj][kk * 6 + 5] = (1 / (2 * tt * tt * tt * tt * tt)) * (12 * hh - 6 * (dd[jj + 1][kk] + dd[jj][kk]) * tt + (d2[jj + 1][kk] - d2[jj][kk]) * tt * tt)
-            self.data[net]
+            if self.type_one_side or (ii // self.ntypes, int(ii % self.ntypes)) not in self.exclude_types:
+                vv, dd, d2 = self._make_data(xx, ii)
+                if self.type_one_side:
+                    net = "filter_-1_net_" + str(ii)
+                else:
+                    net = "filter_" + str(ii // self.ntypes) + "_net_" + str(int(ii % self.ntypes))
+                self.data[net] = np.zeros([self.nspline, 6 * self.last_layer_size], dtype = self.data_type)
+                # for jj in tqdm(range(self.nspline), desc = 'DEEPMD INFO    |-> deepmd.utils.tabulate\t\t\t' + net + ', tabulating'):
+                for jj in range(self.nspline):
+                    for kk in range(self.last_layer_size):
+                        if jj < int((upper - lower) / stride0):
+                            tt = stride0
+                        else:
+                            tt = stride1
+                        hh = vv[jj + 1][kk] - vv[jj][kk]
+                        self.data[net][jj][kk * 6 + 0] = vv[jj][kk]
+                        self.data[net][jj][kk * 6 + 1] = dd[jj][kk]
+                        self.data[net][jj][kk * 6 + 2] = 0.5 * d2[jj][kk]
+                        self.data[net][jj][kk * 6 + 3] = (1 / (2 * tt * tt * tt)) * (20 * hh - (8 * dd[jj + 1][kk] + 12 * dd[jj][kk]) * tt - (3 * d2[jj][kk] - d2[jj + 1][kk]) * tt * tt)
+                        self.data[net][jj][kk * 6 + 4] = (1 / (2 * tt * tt * tt * tt)) * (-30 * hh + (14 * dd[jj + 1][kk] + 16 * dd[jj][kk]) * tt + (3 * d2[jj][kk] - 2 * d2[jj + 1][kk]) * tt * tt)
+                        self.data[net][jj][kk * 6 + 5] = (1 / (2 * tt * tt * tt * tt * tt)) * (12 * hh - 6 * (dd[jj + 1][kk] + dd[jj][kk]) * tt + (d2[jj + 1][kk] - d2[jj][kk]) * tt * tt)
         return lower, upper
 
     def _load_graph(self):
@@ -165,14 +178,17 @@ def _get_bias(self):
             bias["layer_" + str(layer)] = []
             if self.type_one_side:
                 for ii in range(0, self.ntypes):
-                    tensor_value = np.frombuffer (self.filter_variable_nodes["filter_type_all/bias_" + str(layer) + "_" + str(int(ii))].tensor_content)
-                    tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_all/bias_" + str(layer) + "_" + str(int(ii))].tensor_shape).as_list()
+                    tensor_value = np.frombuffer (self.filter_variable_nodes["filter_type_all/bias_" + str(layer) + "_" + str(ii)].tensor_content)
+                    tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_all/bias_" + str(layer) + "_" + str(ii)].tensor_shape).as_list()
                     bias["layer_" + str(layer)].append(np.reshape(tensor_value, tensor_shape))
             else:
                 for ii in range(0, self.ntypes * self.ntypes):
-                    tensor_value = np.frombuffer(self.filter_variable_nodes["filter_type_" + str(int(ii / self.ntypes)) + "/bias_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_content)
-                    tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_" + str(int(ii / self.ntypes)) + "/bias_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_shape).as_list()
-                    bias["layer_" + str(layer)].append(np.reshape(tensor_value, tensor_shape))
+                    if (ii // self.ntypes, int(ii % self.ntypes)) not in self.exclude_types:
+                        tensor_value = np.frombuffer(self.filter_variable_nodes["filter_type_" + str(ii // self.ntypes) + "/bias_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_content)
+                        tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_" + str(ii // self.ntypes) + "/bias_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_shape).as_list()
+                        bias["layer_" + str(layer)].append(np.reshape(tensor_value, tensor_shape))
+                    else:
+                        bias["layer_" + str(layer)].append(np.array([]))
         return bias
 
     def _get_matrix(self):
@@ -181,14 +197,17 @@ def _get_matrix(self):
             matrix["layer_" + str(layer)] = []
             if self.type_one_side:
                 for ii in range(0, self.ntypes):
-                    tensor_value = np.frombuffer (self.filter_variable_nodes["filter_type_all/matrix_" + str(layer) + "_" + str(int(ii))].tensor_content)
-                    tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_all/matrix_" + str(layer) + "_" + str(int(ii))].tensor_shape).as_list()
+                    tensor_value = np.frombuffer (self.filter_variable_nodes["filter_type_all/matrix_" + str(layer) + "_" + str(ii)].tensor_content)
+                    tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_all/matrix_" + str(layer) + "_" + str(ii)].tensor_shape).as_list()
                     matrix["layer_" + str(layer)].append(np.reshape(tensor_value, tensor_shape))
             else:
                 for ii in range(0, self.ntypes * self.ntypes):
-                    tensor_value = np.frombuffer(self.filter_variable_nodes["filter_type_" + str(int(ii / self.ntypes)) + "/matrix_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_content)
-                    tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_" + str(int(ii / self.ntypes)) + "/matrix_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_shape).as_list()
-                    matrix["layer_" + str(layer)].append(np.reshape(tensor_value, tensor_shape))
+                    if (ii // self.ntypes, int(ii % self.ntypes)) not in self.exclude_types:
+                        tensor_value = np.frombuffer(self.filter_variable_nodes["filter_type_" + str(ii // self.ntypes) + "/matrix_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_content)
+                        tensor_shape = tf.TensorShape(self.filter_variable_nodes["filter_type_" + str(ii // self.ntypes) + "/matrix_" + str(layer) + "_" + str(int(ii % self.ntypes))].tensor_shape).as_list()
+                        matrix["layer_" + str(layer)].append(np.reshape(tensor_value, tensor_shape))
+                    else:
+                        matrix["layer_" + str(layer)].append(np.array([]))
         return matrix
 
     # one-by-one executions
@@ -221,8 +240,8 @@ def _layer_1(self, x, w, b):
 
     def _save_data(self):
         for ii in range(self.ntypes * self.ntypes):
-            net = "filter_" + str(int(ii / self.ntypes)) + "_net_" + str(int(ii % self.ntypes))
-            np.savetxt('data_' + str(int(ii)), self.data[net])
+            net = "filter_" + str(ii // self.ntypes) + "_net_" + str(int(ii % self.ntypes))
+            np.savetxt('data_' + str(ii), self.data[net])
 
     def _get_env_mat_range(self,
                            min_nbor_dist):
diff --git a/source/tests/test_model_compression.py b/source/tests/test_model_compression.py
index 91a073c4c2..d67e209c42 100644
--- a/source/tests/test_model_compression.py
+++ b/source/tests/test_model_compression.py
@@ -384,3 +384,137 @@ def test_ase(self):
         for ii in range(nframes):
             self.assertAlmostEqual(ee0.reshape([-1])[ii], ee1.reshape([-1])[ii], places = default_places)
 
+class TestDeepPotAPBCExcludeTypes(unittest.TestCase) :
+    def setUp(self):
+        self.data_file  = str(tests_path / os.path.join("model_compression", "data"))
+        self.frozen_model = str(tests_path / "dp-original.pb")
+        self.compressed_model = str(tests_path / "dp-compressed.pb")
+        self.INPUT = str(tests_path / "input.json")
+        jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+        jdata["training"]["training_data"]["systems"] = self.data_file
+        jdata["training"]["validation_data"]["systems"] = self.data_file
+        jdata["model"]["descriptor"]["exclude_types"] = [[0, 1]]
+        with open(self.INPUT, "w") as fp:
+            json.dump(jdata, fp, indent=4)
+
+        ret = os.system("dp train " + self.INPUT)
+        assert(ret == 0), "DP train error!"
+        ret = os.system("dp freeze -o " + self.frozen_model)
+        assert(ret == 0), "DP freeze error!"
+        ret = os.system("dp compress " + self.INPUT + " -i " + self.frozen_model + " -o " + self.compressed_model)
+        assert(ret == 0), "DP model compression error!"
+        
+        self.dp_original = DeepPot(self.frozen_model)
+        self.dp_compressed = DeepPot(self.compressed_model)
+        self.coords = np.array([12.83, 2.56, 2.18,
+                                12.09, 2.87, 2.74,
+                                00.25, 3.32, 1.68,
+                                3.36, 3.00, 1.81,
+                                3.51, 2.51, 2.60,
+                                4.27, 3.22, 1.56])
+        self.atype = [0, 1, 1, 0, 1, 1]
+        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+
+    def tearDown(self):
+        _file_delete(self.INPUT)
+        _file_delete(self.frozen_model)
+        _file_delete(self.compressed_model)
+        _file_delete("out.json")
+        _file_delete("compress.json")
+        _file_delete("checkpoint")
+        _file_delete("lcurve.out")
+        _file_delete("model.ckpt.meta")
+        _file_delete("model.ckpt.index")
+        _file_delete("model.ckpt.data-00000-of-00001")
+
+    def test_attrs(self):
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
+        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self):
+        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
+        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        # check values
+        for ii in range(ff0.size):
+            self.assertAlmostEqual(ff0.reshape([-1])[ii], ff1.reshape([-1])[ii], places = default_places)
+        for ii in range(nframes):
+            self.assertAlmostEqual(ee0.reshape([-1])[ii], ee1.reshape([-1])[ii], places = default_places)
+        for ii in range(nframes, 9):
+            self.assertAlmostEqual(vv0.reshape([-1])[ii], vv1.reshape([-1])[ii], places = default_places)
+
+    def test_1frame_atm(self):
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ae0.shape, (nframes,natoms,1))
+        self.assertEqual(av0.shape, (nframes,natoms,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ae1.shape, (nframes,natoms,1))
+        self.assertEqual(av1.shape, (nframes,natoms,9))
+        # check values
+        for ii in range(ff0.size):
+            self.assertAlmostEqual(ff0.reshape([-1])[ii], ff1.reshape([-1])[ii], places = default_places)
+        for ii in range(ae0.size):
+            self.assertAlmostEqual(ae0.reshape([-1])[ii], ae1.reshape([-1])[ii], places = default_places)
+        for ii in range(av0.size):
+            self.assertAlmostEqual(av0.reshape([-1])[ii], av1.reshape([-1])[ii], places = default_places)
+        for ii in range(nframes):
+            self.assertAlmostEqual(ee0.reshape([-1])[ii], ee1.reshape([-1])[ii], places = default_places)
+        for ii in range(nframes, 9):
+            self.assertAlmostEqual(vv0.reshape([-1])[ii], vv1.reshape([-1])[ii], places = default_places)
+
+    def test_2frame_atm(self):
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ae0.shape, (nframes,natoms,1))
+        self.assertEqual(av0.shape, (nframes,natoms,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ae1.shape, (nframes,natoms,1))
+        self.assertEqual(av1.shape, (nframes,natoms,9))
+
+        # check values
+        for ii in range(ff0.size):
+            self.assertAlmostEqual(ff0.reshape([-1])[ii], ff1.reshape([-1])[ii], places = default_places)
+        for ii in range(ae0.size):
+            self.assertAlmostEqual(ae0.reshape([-1])[ii], ae1.reshape([-1])[ii], places = default_places)
+        for ii in range(av0.size):
+            self.assertAlmostEqual(av0.reshape([-1])[ii], av1.reshape([-1])[ii], places = default_places)
+        for ii in range(nframes):
+            self.assertAlmostEqual(ee0.reshape([-1])[ii], ee1.reshape([-1])[ii], places = default_places)
+        for ii in range(nframes, 9):
+            self.assertAlmostEqual(vv0.reshape([-1])[ii], vv1.reshape([-1])[ii], places = default_places)
\ No newline at end of file

From 44d49f346dbab84723db0b086e9bac9c5792abd7 Mon Sep 17 00:00:00 2001
From: Denghui Lu <denghuilu@pku.edu.cn>
Date: Sun, 20 Jun 2021 22:15:01 +0800
Subject: [PATCH 4/8] Add a more detailed introduction for model compression
 (#772)

---
 deepmd/entrypoints/compress.py       | 16 +++++-----
 deepmd/entrypoints/main.py           | 35 ++++++++++++---------
 doc/getting-started.md               | 47 ++++++++++++++++++++++------
 source/tests/test_argument_parser.py |  8 ++---
 4 files changed, 70 insertions(+), 36 deletions(-)

diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py
index 2193f0c1f9..1222b1e51b 100644
--- a/deepmd/entrypoints/compress.py
+++ b/deepmd/entrypoints/compress.py
@@ -23,7 +23,7 @@ def compress(
     input: str,
     output: str,
     extrapolate: int,
-    stride: float,
+    step: float,
     frequency: str,
     checkpoint_folder: str,
     mpi_log: str,
@@ -34,9 +34,9 @@ def compress(
     """Compress model.
 
     The table is composed of fifth-order polynomial coefficients and is assembled from
-    two sub-tables. The first table takes the stride(parameter) as it's uniform stride,
-    while the second table takes 10 * stride as it's uniform stride. The range of the
-    first table is automatically detected by deepmd-kit, while the second table ranges
+    two sub-tables. The first table takes the step parameter as the domain's uniform step size,
+    while the second table takes 10 * step as it's uniform step size. The range of the
+    first table is automatically detected by the code, while the second table ranges
     from the first table's upper boundary(upper) to the extrapolate(parameter) * upper.
 
     Parameters
@@ -49,8 +49,8 @@ def compress(
         compressed model filename
     extrapolate : int
         scale of model extrapolation
-    stride : float
-        uniform stride of tabulation's first table
+    step : float
+        uniform step size of the tabulation's first table
     frequency : str
         frequency of tabulation overflow check
     checkpoint_folder : str
@@ -71,8 +71,8 @@ def compress(
     jdata["model"]["compress"]["model_file"] = input
     jdata["model"]["compress"]["table_config"] = [
         extrapolate,
-        stride,
-        10 * stride,
+        step,
+        10 * step,
         int(frequency),
     ]
     # be careful here, if one want to refine the model
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index e0c1d8d4af..b245053053 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -242,8 +242,8 @@ def parse_args(args: Optional[List[str]] = None):
     # * compress model *****************************************************************
     # Compress a model, which including tabulating the embedding-net.
     # The table is composed of fifth-order polynomial coefficients and is assembled
-    # from two sub-tables. The first table takes the stride(parameter) as it's uniform
-    # stride, while the second table takes 10 * stride as it\s uniform stride
+    # from two sub-tables. The first table takes the step(parameter) as it's uniform
+    # step, while the second table takes 10 * step as it\s uniform step
     #  The range of the first table is automatically detected by deepmd-kit, while the
     # second table ranges from the first table's upper boundary(upper) to the
     # extrapolate(parameter) * upper.
@@ -263,36 +263,43 @@ def parse_args(args: Optional[List[str]] = None):
         "--input",
         default="frozen_model.pb",
         type=str,
-        help="The original frozen model, which will be compressed by the deepmd-kit",
+        help="The original frozen model, which will be compressed by the code",
     )
     parser_compress.add_argument(
         "-o",
         "--output",
-        default="frozen_model_compress.pb",
+        default="frozen_model_compressed.pb",
         type=str,
         help="The compressed model",
     )
+    parser_compress.add_argument(
+        "-s",
+        "--step",
+        default=0.01,
+        type=float,
+        help="Model compression uses fifth-order polynomials to interpolate the embedding-net. " 
+        "It introduces two tables with different step size to store the parameters of the polynomials. "
+        "The first table covers the range of the training data, while the second table is an extrapolation of the training data. "
+        "The domain of each table is uniformly divided by a given step size. "
+        "And the step(parameter) denotes the step size of the first table and the second table will "
+        "use 10 * step as it's step size to save the memory. "
+        "Usually the value ranges from 0.1 to 0.001. " 
+        "Smaller step means higher accuracy and bigger model size",
+    )
     parser_compress.add_argument(
         "-e",
         "--extrapolate",
         default=5,
         type=int,
-        help="The scale of model extrapolation",
-    )
-    parser_compress.add_argument(
-        "-s",
-        "--stride",
-        default=0.01,
-        type=float,
-        help="The uniform stride of tabulation's first table, the second table will "
-        "use 10 * stride as it's uniform stride",
+        help="The domain range of the first table is automatically detected by the code: [d_low, d_up]. "
+        "While the second table ranges from the first table's upper boundary(d_up) to the extrapolate(parameter) * d_up: [d_up, extrapolate * d_up]",
     )
     parser_compress.add_argument(
         "-f",
         "--frequency",
         default=-1,
         type=int,
-        help="The frequency of tabulation overflow check(If the input environment "
+        help="The frequency of tabulation overflow check(Whether the input environment "
         "matrix overflow the first or second table range). "
         "By default do not check the overflow",
     )
diff --git a/doc/getting-started.md b/doc/getting-started.md
index a355fc9836..6a10a49eee 100644
--- a/doc/getting-started.md
+++ b/doc/getting-started.md
@@ -244,23 +244,50 @@ positional arguments:
 
 optional arguments:
   -h, --help            show this help message and exit
+  -v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}, --log-level {DEBUG,3,INFO,2,WARNING,1,ERROR,0}
+                        set verbosity level by string or number, 0=ERROR,
+                        1=WARNING, 2=INFO and 3=DEBUG (default: INFO)
+  -l LOG_PATH, --log-path LOG_PATH
+                        set log file to log messages to disk, if not
+                        specified, the logs will only be output to console
+                        (default: None)
+  -m {master,collect,workers}, --mpi-log {master,collect,workers}
+                        Set the manner of logging when running with MPI.
+                        'master' logs only on main process, 'collect'
+                        broadcasts logs from workers to master and 'workers'
+                        means each process will output its own log (default:
+                        master)
   -i INPUT, --input INPUT
                         The original frozen model, which will be compressed by
-                        the deepmd-kit
+                        the code (default: frozen_model.pb)
   -o OUTPUT, --output OUTPUT
-                        The compressed model
+                        The compressed model (default:
+                        frozen_model_compressed.pb)
+  -s STEP, --step STEP  Model compression uses fifth-order polynomials to
+                        interpolate the embedding-net. It introduces two
+                        tables with different step size to store the
+                        parameters of the polynomials. The first table covers
+                        the range of the training data, while the second table
+                        is an extrapolation of the training data. The domain
+                        of each table is uniformly divided by a given step
+                        size. And the step(parameter) denotes the step size of
+                        the first table and the second table will use 10 *
+                        step as it's step size to save the memory. Usually the
+                        value ranges from 0.1 to 0.001. Smaller step means
+                        higher accuracy and bigger model size (default: 0.01)
   -e EXTRAPOLATE, --extrapolate EXTRAPOLATE
-                        The scale of model extrapolation
-  -s STRIDE, --stride STRIDE
-                        The uniform stride of tabulation's first table, the
-                        second table will use 10 * stride as it's uniform
-                        stride
+                        The domain range of the first table is automatically
+                        detected by the code: [d_low, d_up]. While the second
+                        table ranges from the first table's upper
+                        boundary(d_up) to the extrapolate(parameter) * d_up:
+                        [d_up, extrapolate * d_up] (default: 5)
   -f FREQUENCY, --frequency FREQUENCY
-                        The frequency of tabulation overflow check(If the
+                        The frequency of tabulation overflow check(Whether the
                         input environment matrix overflow the first or second
                         table range). By default do not check the overflow
-  -d FOLDER, --folder FOLDER
-                        path to checkpoint folder
+                        (default: -1)
+  -c CHECKPOINT_FOLDER, --checkpoint-folder CHECKPOINT_FOLDER
+                        path to checkpoint folder (default: .)
 ```
 **Parameter explanation**
 
diff --git a/source/tests/test_argument_parser.py b/source/tests/test_argument_parser.py
index 1c85728e40..f9f28fb81b 100644
--- a/source/tests/test_argument_parser.py
+++ b/source/tests/test_argument_parser.py
@@ -272,10 +272,10 @@ def test_parser_compress(self):
         ARGS = {
             "INPUT": dict(type=str, value="INFILE"),
             "--output": dict(type=str, value="OUTFILE"),
-            "--extrapolate": dict(type=int, value=10),
-            "--stride": dict(type=float, value=0.1),
-            "--frequency": dict(type=int, value=1),
-            "--checkpoint-folder": dict(type=str, value="FOLDER"),
+            "--extrapolate": dict(type=int, value=5),
+            "--step": dict(type=float, value=0.1),
+            "--frequency": dict(type=int, value=-1),
+            "--checkpoint-folder": dict(type=str, value="."),
         }
 
         self.run_test(command="compress", mapping=ARGS)

From b15944d82dac0466f1bb7245fb762a1eefd24cfa Mon Sep 17 00:00:00 2001
From: Yixiao Chen <19890787+y1xiaoc@users.noreply.github.com>
Date: Tue, 22 Jun 2021 01:59:07 -0400
Subject: [PATCH 5/8] allow c++ tests to run without internet (#785)

* fix bug when trying to find gtest in cmake

* link librt explicitly

* add script to test cc without installing tf
---
 source/api_cc/tests/CMakeLists.txt |  8 ++++----
 source/install/test_cc_local.sh    | 33 ++++++++++++++++++++++++++++++
 source/lib/tests/CMakeLists.txt    |  2 +-
 3 files changed, 38 insertions(+), 5 deletions(-)
 create mode 100755 source/install/test_cc_local.sh

diff --git a/source/api_cc/tests/CMakeLists.txt b/source/api_cc/tests/CMakeLists.txt
index 111c7646bb..6768ff2ee6 100644
--- a/source/api_cc/tests/CMakeLists.txt
+++ b/source/api_cc/tests/CMakeLists.txt
@@ -95,17 +95,17 @@ else()
 endif()
 
 if (USE_CUDA_TOOLKIT)
-  target_link_libraries(runUnitTests gtest gtest_main ${libname} ${apiname} ${opname} pthread ${TensorFlow_LIBRARY} deepmd_op_cuda coverage_config)
+  target_link_libraries(runUnitTests gtest gtest_main ${libname} ${apiname} ${opname} pthread ${TensorFlow_LIBRARY} rt deepmd_op_cuda coverage_config)
 elseif(USE_ROCM_TOOLKIT)
-  target_link_libraries(runUnitTests gtest gtest_main ${libname} ${apiname} ${opname} pthread ${TensorFlow_LIBRARY} deepmd_op_rocm coverage_config)
+  target_link_libraries(runUnitTests gtest gtest_main ${libname} ${apiname} ${opname} pthread ${TensorFlow_LIBRARY} rt deepmd_op_rocm coverage_config)
 else()
-  target_link_libraries(runUnitTests gtest gtest_main ${libname} ${apiname} ${opname} pthread ${TensorFlow_LIBRARY} coverage_config)
+  target_link_libraries(runUnitTests gtest gtest_main ${libname} ${apiname} ${opname} pthread ${TensorFlow_LIBRARY} rt coverage_config)
 endif()
 
 add_test( runUnitTests runUnitTests )
 
 find_package(GTest)
-if(NOT GTEST_LIBRARY)
+if(NOT GTEST_LIBRARIES)
   configure_file(../../cmake/googletest.cmake.in googletest-download/CMakeLists.txt)
   execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
     RESULT_VARIABLE result
diff --git a/source/install/test_cc_local.sh b/source/install/test_cc_local.sh
new file mode 100755
index 0000000000..1f28d7efa8
--- /dev/null
+++ b/source/install/test_cc_local.sh
@@ -0,0 +1,33 @@
+set -e
+
+#------------------
+
+SCRIPT_PATH=$(dirname $(realpath -s $0))
+NPROC=$(nproc --all)
+
+#------------------
+
+BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
+mkdir -p ${BUILD_TMP_DIR}
+cd ${BUILD_TMP_DIR}
+cmake ../lib/tests
+make -j${NPROC}
+
+#------------------
+${BUILD_TMP_DIR}/runUnitTests
+
+
+#------------------
+
+echo "try to find tensorflow in ${tensorflow_root}"
+BUILD_TMP_DIR=${SCRIPT_PATH}/../build_cc_tests
+INSTALL_PREFIX=${SCRIPT_PATH}/../../dp
+mkdir -p ${BUILD_TMP_DIR}
+mkdir -p ${INSTALL_PREFIX}
+cd ${BUILD_TMP_DIR}
+cmake -DINSTALL_TENSORFLOW=FALSE -DTENSORFLOW_ROOT=${tensorflow_root} ../api_cc/tests
+make -j${NPROC}
+
+#------------------
+cd ${SCRIPT_PATH}/../api_cc/tests
+${BUILD_TMP_DIR}/runUnitTests
diff --git a/source/lib/tests/CMakeLists.txt b/source/lib/tests/CMakeLists.txt
index b12734af9b..b5a0460c54 100644
--- a/source/lib/tests/CMakeLists.txt
+++ b/source/lib/tests/CMakeLists.txt
@@ -87,7 +87,7 @@ add_test( runUnitTests runUnitTests )
 # )
 
 find_package(GTest)
-if(NOT GTEST_LIBRARY)
+if(NOT GTEST_LIBRARIES)
   configure_file(../../cmake/googletest.cmake.in googletest-download/CMakeLists.txt)
   execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
     RESULT_VARIABLE result

From 45550344a73bbea16febc969459562f401412543 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 22 Jun 2021 11:33:51 -0400
Subject: [PATCH 6/8] support converting models generated in v1.3 to 2.0
 compatibility (#725)

* add v1.3 compatibility

* remove TestModelMajorCompatability as compatibility was added

By the way: Compatability should be compatibility

* Also remove TestModelMinorCompatability

* Update test_deeppot_a.py

* Revert "Update test_deeppot_a.py"

This reverts commit a03b5ee62107cfdd4f6fb621db4565c8531c4cd6.

* Revert "Also remove TestModelMinorCompatability"

This reverts commit 11fdd5c67f7b467bdaa4fb04d6280841caa90ffb.

* Revert "remove TestModelMajorCompatability as compatibility was added"

This reverts commit 40dd8073b268bf2a557cb4de4e50ae6fcb25f85b.

* revert allowing 0.0 model

* convert from model 1.3 to 2.0

* fix .gitignore
---
 deepmd/entrypoints/__init__.py         |  4 +-
 deepmd/entrypoints/convert.py          | 13 ++++++
 deepmd/entrypoints/main.py             | 33 +++++++++++++-
 deepmd/utils/convert.py                | 59 ++++++++++++++++++++++++++
 source/api_cc/include/common.h         |  4 ++
 source/api_cc/src/DeepPot.cc           |  5 +++
 source/api_cc/src/common.cc            |  2 +-
 source/op/prod_env_mat_multi_device.cc | 53 ++++++++++++++++++++++-
 8 files changed, 169 insertions(+), 4 deletions(-)
 create mode 100644 deepmd/entrypoints/convert.py
 create mode 100644 deepmd/utils/convert.py

diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py
index 3beceace3a..4a02b995f3 100644
--- a/deepmd/entrypoints/__init__.py
+++ b/deepmd/entrypoints/__init__.py
@@ -8,6 +8,7 @@
 from .train import train
 from .transfer import transfer
 from ..infer.model_devi import make_model_devi
+from .convert import convert
 
 __all__ = [
     "config",
@@ -18,5 +19,6 @@
     "transfer",
     "compress",
     "doc_train_input",
-    "make_model_devi"
+    "make_model_devi",
+    "convert",
 ]
diff --git a/deepmd/entrypoints/convert.py b/deepmd/entrypoints/convert.py
new file mode 100644
index 0000000000..4bf514fe51
--- /dev/null
+++ b/deepmd/entrypoints/convert.py
@@ -0,0 +1,13 @@
+from deepmd.utils.convert import convert_13_to_20
+
+def convert(
+    *,
+    FROM: str,
+    input_model: str,
+    output_model: str,
+    **kwargs,
+):
+    if FROM == '1.3':
+        convert_13_to_20(input_model, output_model)
+    else:
+        raise RuntimeError('unsupported model version ' + FROM)
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index b245053053..04dc245271 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -3,7 +3,7 @@
 import argparse
 import logging
 from pathlib import Path
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 from deepmd.entrypoints import (
     compress,
@@ -14,6 +14,7 @@
     train,
     transfer,
     make_model_devi,
+    convert,
 )
 from deepmd.loggers import set_log_handles
 
@@ -359,6 +360,34 @@ def parse_args(args: Optional[List[str]] = None):
         help="The trajectory frequency of the system"
     )
 
+    # * convert models
+    # supported: 1.3->2.0
+    parser_transform = subparsers.add_parser(
+        'convert-from',
+        parents=[parser_log],
+        help='convert lower model version to supported version',
+    )
+    parser_transform.add_argument(
+        'FROM',
+        type = str,
+        choices = ['1.3'],
+        help="The original model compatibility",
+    )
+    parser_transform.add_argument(
+        '-i',
+        "--input-model",
+        default = "frozen_model.pb",
+        type=str, 
+		help = "the input model",
+    )
+    parser_transform.add_argument(
+        "-o",
+        "--output-model",
+        default = "convert_out.pb",
+        type=str, 
+		help='the output model',
+    )
+
     parsed_args = parser.parse_args(args=args)
     if parsed_args.command is None:
         parser.print_help()
@@ -402,6 +431,8 @@ def main():
         doc_train_input()
     elif args.command == "model-devi":
         make_model_devi(**dict_args)
+    elif args.command == "convert-from":
+        convert(**dict_args)
     elif args.command is None:
         pass
     else:
diff --git a/deepmd/utils/convert.py b/deepmd/utils/convert.py
new file mode 100644
index 0000000000..0d9c39df88
--- /dev/null
+++ b/deepmd/utils/convert.py
@@ -0,0 +1,59 @@
+import os
+from deepmd.env import tf
+from google.protobuf import text_format
+from tensorflow.python.platform import gfile
+
+def convert_13_to_20(input_model: str, output_model: str):
+    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
+    convert_dp13_to_dp20('frozen_model.pbtxt')
+    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
+    if os.path.isfile('frozen_model.pbtxt'):
+        os.remove('frozen_model.pbtxt')
+    print("the converted output model (2.0 support) is saved in %s" % output_model)
+
+def convert_pb_to_pbtxt(pbfile: str, pbtxtfile: str):
+    with gfile.FastGFile(pbfile, 'rb') as f:
+        graph_def = tf.GraphDef()
+        graph_def.ParseFromString(f.read())
+        tf.import_graph_def(graph_def, name='')
+        tf.train.write_graph(graph_def, './', pbtxtfile, as_text=True)
+
+def convert_pbtxt_to_pb(pbtxtfile: str, pbfile: str):
+    with tf.gfile.FastGFile(pbtxtfile, 'r') as f:
+        graph_def = tf.GraphDef()
+        file_content = f.read()
+        # Merges the human-readable string in `file_content` into `graph_def`.
+        text_format.Merge(file_content, graph_def)
+        tf.train.write_graph(graph_def, './', pbfile, as_text=False)
+
+def convert_dp13_to_dp20(fname: str):
+    with open(fname) as fp:
+        file_content = fp.read()
+    file_content += """
+node {
+  name: "model_attr/model_version"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_STRING
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_STRING
+        tensor_shape {
+        }
+        string_val: "1.0"
+      }
+    }
+  }
+}
+"""
+    file_content = file_content\
+                   .replace('DescrptSeA', 'ProdEnvMatA')\
+                   .replace('DescrptSeR', 'ProdEnvMatR')
+    with open(fname, 'w') as fp:
+        fp.write(file_content)
diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h
index d59878693e..75fd61a6f7 100644
--- a/source/api_cc/include/common.h
+++ b/source/api_cc/include/common.h
@@ -87,6 +87,10 @@ void
 get_env_nthreads(int & num_intra_nthreads,
 		 int & num_inter_nthreads);
 
+struct
+tf_exception: public std::exception {
+};
+
 /**
 * @brief Check TensorFlow status. Exit if not OK.
 * @param[in] status TensorFlow status.
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 9400b47691..c862bb84fd 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -254,7 +254,12 @@ init (const std::string & model, const int & gpu_rank, const std::string & file_
   if (dfparam < 0) dfparam = 0;
   if (daparam < 0) daparam = 0;
   model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
+  try{
   model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
+  } catch (deepmd::tf_exception& e){
+    // no model version defined in old models
+    model_version = "0.0";
+  }
   if(! model_compatable(model_version)){
     throw std::runtime_error(
 	"incompatable model: version " + model_version 
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index 74c317529e..579216cb2c 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -201,7 +201,7 @@ deepmd::
 check_status(const tensorflow::Status& status) {
   if (!status.ok()) {
     std::cout << status.ToString() << std::endl;
-    exit(1);
+    throw deepmd::tf_exception();
   }
 }
 
diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc
index e4e12cac2b..6320f1f501 100644
--- a/source/op/prod_env_mat_multi_device.cc
+++ b/source/op/prod_env_mat_multi_device.cc
@@ -25,6 +25,26 @@ REGISTER_OP("ProdEnvMatA")
     .Output("nlist: int32");
     // only sel_a and rcut_r uesd.
 
+// an alias of ProdEnvMatA -- Compatible with v1.3
+REGISTER_OP("DescrptSeA")
+    .Attr("T: {float, double}")
+    .Input("coord: T")
+    .Input("type: int32")
+    .Input("natoms: int32")
+    .Input("box : T")
+    .Input("mesh : int32")
+    .Input("davg: T")
+    .Input("dstd: T")
+    .Attr("rcut_a: float")
+    .Attr("rcut_r: float")
+    .Attr("rcut_r_smth: float")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .Output("descrpt: T")
+    .Output("descrpt_deriv: T")
+    .Output("rij: T")
+    .Output("nlist: int32");
+
 REGISTER_OP("ProdEnvMatR")
     .Attr("T: {float, double}")
     .Input("coord: T")
@@ -42,6 +62,23 @@ REGISTER_OP("ProdEnvMatR")
     .Output("rij: T")
     .Output("nlist: int32");
 
+// an alias of ProdEnvMatR -- Compatible with v1.3
+REGISTER_OP("DescrptSeR")
+    .Attr("T: {float, double}")
+    .Input("coord: T")
+    .Input("type: int32")
+    .Input("natoms: int32")
+    .Input("box: T")
+    .Input("mesh: int32")
+    .Input("davg: T")
+    .Input("dstd: T")
+    .Attr("rcut: float")
+    .Attr("rcut_smth: float")
+    .Attr("sel: list(int)")
+    .Output("descrpt: T")
+    .Output("descrpt_deriv: T")
+    .Output("rij: T")
+    .Output("nlist: int32"); 
 
 template<typename FPTYPE>
 static int
@@ -1364,17 +1401,25 @@ _prepare_coord_nlist_gpu_rocm(
 
 
 // Register the CPU kernels.
+// Compatible with v1.3
 #define REGISTER_CPU(T)                                                                                   \
 REGISTER_KERNEL_BUILDER(                                                                                  \
     Name("ProdEnvMatA").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
     ProdEnvMatAOp<CPUDevice, T>);                                                                         \
 REGISTER_KERNEL_BUILDER(                                                                                  \
     Name("ProdEnvMatR").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
-    ProdEnvMatROp<CPUDevice, T>);                   
+    ProdEnvMatROp<CPUDevice, T>);                                                                         \
+REGISTER_KERNEL_BUILDER(                                                                                  \
+    Name("DescrptSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
+    ProdEnvMatAOp<CPUDevice, T>);                                                                         \
+REGISTER_KERNEL_BUILDER(                                                                                  \
+    Name("DescrptSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
+    ProdEnvMatROp<CPUDevice, T>);   
 REGISTER_CPU(float);                  
 REGISTER_CPU(double);                 
             
 // Register the GPU kernels.                  
+// Compatible with v1.3
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM            
 #define REGISTER_GPU(T)                                                                                   \
 REGISTER_KERNEL_BUILDER(                                                                                  \
@@ -1382,6 +1427,12 @@ REGISTER_KERNEL_BUILDER(
     ProdEnvMatAOp<GPUDevice, T>);                                                                         \
 REGISTER_KERNEL_BUILDER(                                                                                  \
     Name("ProdEnvMatR").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
+    ProdEnvMatROp<GPUDevice, T>);                                                                         \
+REGISTER_KERNEL_BUILDER(                                                                                  \
+    Name("DescrptSeA").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
+    ProdEnvMatAOp<GPUDevice, T>);                                                                         \
+REGISTER_KERNEL_BUILDER(                                                                                  \
+    Name("DescrptSeR").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
     ProdEnvMatROp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);

From 7d145c5a0329c683d0151dc6bef50154a9fea86c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 26 Jun 2021 06:05:24 -0400
Subject: [PATCH 7/8] add documents for conda (#798)

* add documentation for conda

fix #650, #740, #750

* add docs

* Update install.md

Co-authored-by: Han Wang <amcadmus@gmail.com>
---
 doc/install.md | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/doc/install.md b/doc/install.md
index e2d7859097..cd0c944161 100644
--- a/doc/install.md
+++ b/doc/install.md
@@ -3,6 +3,7 @@
 - [Easy installation methods](#easy-installation-methods)
 - [Install from source code](#install-from-source-code)
 - [Install i-PI](#install-i-pi)
+- [Building conda packages](#building-conda-packages)
 
 ## Easy installation methods
 
@@ -18,17 +19,33 @@ After your easy installation, DeePMD-kit (`dp`) and LAMMPS (`lmp`) will be avail
 ### Install off-line packages
 Both CPU and GPU version offline packages are avaiable in [the Releases page](https://github.com/deepmodeling/deepmd-kit/releases).
 
+Some packages are splited into two files due to size limit of GitHub. One may merge them into one after downloading:
+```bash
+cat deepmd-kit-2.0.0-cuda11.1_gpu-Linux-x86_64.sh.0 deepmd-kit-2.0.0-cuda11.1_gpu-Linux-x86_64.sh.1 > deepmd-kit-2.0.0-cuda11.1_gpu-Linux-x86_64.sh
+```
+
 ### Install with conda
 DeePMD-kit is avaiable with [conda](https://github.com/conda/conda). Install [Anaconda](https://www.anaconda.com/distribution/#download-section) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html) first.
 
-To install the CPU version:
+One may create an environment that contains the CPU version of DeePMD-kit and LAMMPS:
+```bash
+conda create -n deepmd deepmd-kit=*=*cpu lammps-dp=*=*cpu -c https://conda.deepmodeling.org
+```
+
+Or one may want to create a GPU environment containing [CUDA Toolkit](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver):
 ```bash
-conda install deepmd-kit=*=*cpu lammps-dp=*=*cpu -c deepmodeling
+conda create -n deepmd deepmd-kit=*=*gpu lammps-dp=*=*gpu cudatoolkit=11.1 -c https://conda.deepmodeling.org -c nvidia
 ```
+One could change the CUDA Toolkit version from `11.1` to `10.1` or `10.0`.
 
-To install the GPU version containing [CUDA 10.1](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver):
+One may speficy the DeePMD-kit version such as `2.0.0` using
 ```bash
-conda install deepmd-kit=*=*gpu lammps-dp=*=*gpu -c deepmodeling
+conda create -n deepmd deepmd-kit=2.0.0=*cpu lammps-dp=2.0.0=*cpu -c https://conda.deepmodeling.org
+```
+
+One may enable the environment using
+```bash
+conda activate deepmd
 ```
 
 ### Install with docker
@@ -249,3 +266,24 @@ Test with Pytest:
 pip install pytest
 pytest --pyargs ipi.tests
 ```
+
+## Building conda packages
+
+One may want to keep both convenience and personalization of the DeePMD-kit. To achieve this goal, one can consider builing conda packages. We provide building scripts in [deepmd-kit-recipes organization](https://github.com/deepmd-kit-recipes/). These building tools are driven by [conda-build](https://github.com/conda/conda-build) and [conda-smithy](https://github.com/conda-forge/conda-smithy).
+
+For example, if one wants to turn on `MPIIO` package in LAMMPS, go to [`lammps-dp-feedstock`](https://github.com/deepmd-kit-recipes/lammps-dp-feedstock/) repository and modify `recipe/build.sh`. `-D PKG_MPIIO=OFF` should be changed to `-D PKG_MPIIO=ON`. Then go to the main directory and executing
+
+```sh
+./build-locally.py
+```
+
+This requires the Docker has been installed. After the building, the packages will be generated in `build_artifacts/linux-64` and `build_artifacts/noarch`, and then one can install then execuating
+```sh
+conda create -n deepmd lammps-dp -c file:///path/to/build_artifacts -c https://conda.deepmodeling.org -c nvidia
+```
+
+One may also upload packages to one's Anaconda channel, so they can be installed on other machines:
+
+```sh
+anaconda upload /path/to/build_artifacts/linux-64/*.tar.bz2 /path/to/build_artifacts/noarch/*.tar.bz2
+```

From b145ca336e63a6d2628d8cdd3970ac395e577b98 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 26 Jun 2021 06:10:48 -0400
Subject: [PATCH 8/8] throw a message if tf runtime is incompatible (#797)

* throw a message if tf runtime is incompatible

fix #557 and #796.

* still raise if tf version is correct

* detect TF_CXX11_ABI_FLAG

* format codes

* fix lint

* move messages into the function

* fix lint

* fix lints
---
 deepmd/env.py                     | 82 +++++++++++++++++++++++++------
 source/cmake/Findtensorflow.cmake | 19 ++++++-
 source/cmake/tf_version.cpp       | 10 ++++
 source/config/run_config.ini      |  2 +
 4 files changed, 98 insertions(+), 15 deletions(-)
 create mode 100644 source/cmake/tf_version.cpp

diff --git a/deepmd/env.py b/deepmd/env.py
index 8c6937b7f7..5f5c344031 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -1,13 +1,14 @@
 """Module that sets tensorflow working environment and exports inportant constants."""
 
-import os
-from pathlib import Path
 import logging
+import os
 import platform
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Any
-import numpy as np
-from imp import reload
 from configparser import ConfigParser
+from imp import reload
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+
+import numpy as np
 
 if TYPE_CHECKING:
     from types import ModuleType
@@ -37,6 +38,7 @@
 
 SHARED_LIB_MODULE = "op"
 
+
 def set_env_if_empty(key: str, value: str, verbose: bool = True):
     """Set environment variable only if it is empty.
 
@@ -74,7 +76,8 @@ def set_mkl():
     """
     if "mkl_rt" in np.__config__.get_info("blas_mkl_info").get("libraries", []):
         set_env_if_empty("KMP_BLOCKTIME", "0")
-        set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
+        set_env_if_empty(
+            "KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
         reload(np)
 
 
@@ -118,8 +121,10 @@ def get_tf_session_config() -> Any:
         intra_op_parallelism_threads=intra, inter_op_parallelism_threads=inter
     )
 
+
 default_tf_session_config = get_tf_session_config()
 
+
 def get_module(module_name: str) -> "ModuleType":
     """Load force module.
 
@@ -149,14 +154,59 @@ def get_module(module_name: str) -> "ModuleType":
     if not module_file.is_file():
         raise FileNotFoundError(f"module {module_name} does not exist")
     else:
-        module = tf.load_op_library(str(module_file))
+        try:
+            module = tf.load_op_library(str(module_file))
+        except tf.errors.NotFoundError as e:
+            # check CXX11_ABI_FLAG is compatiblity
+            # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
+            # ABI should be the same
+            if 'CXX11_ABI_FLAG' in tf.__dict__:
+                tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG
+            else:
+                tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG
+            if TF_CXX11_ABI_FLAG != tf_cxx11_abi_flag:
+                raise RuntimeError(
+                    "This deepmd-kit package was compiled with "
+                    "CXX11_ABI_FLAG=%d, but TensorFlow runtime was compiled "
+                    "with CXX11_ABI_FLAG=%d. These two library ABIs are "
+                    "incompatible and thus an error is raised when loading %s."
+                    "You need to rebuild deepmd-kit against this TensorFlow "
+                    "runtime." % (
+                        TF_CXX11_ABI_FLAG,
+                        tf_cxx11_abi_flag,
+                        module_name,
+                    )) from e
+
+            # different versions may cause incompatibility
+            # see #406, #447, #557, #774, and #796 for example
+            # throw a message if versions are different
+            if TF_VERSION != tf.version.VERSION:
+                raise RuntimeError(
+                    "The version of TensorFlow used to compile this "
+                    "deepmd-kit package is %s, but the version of TensorFlow "
+                    "runtime you are using is %s. These two versions are "
+                    "incompatible and thus an error is raised when loading %s. "
+                    "You need to install TensorFlow %s, or rebuild deepmd-kit "
+                    "against TensorFlow %s.\nIf you are using a wheel from "
+                    "pypi, you may consider to install deepmd-kit execuating "
+                    "`pip install deepmd-kit --no-binary deepmd-kit` "
+                    "instead." % (
+                        TF_VERSION,
+                        tf.version.VERSION,
+                        module_name,
+                        TF_VERSION,
+                        tf.version.VERSION,
+                    )) from e
+            raise RuntimeError(
+                "This deepmd-kit package is inconsitent with TensorFlow"
+                "Runtime, thus an error is raised when loading %s."
+                "You need to rebuild deepmd-kit against this TensorFlow"
+                "runtime." % (
+                    module_name,
+                )) from e
         return module
 
 
-op_module = get_module("libop_abi")
-op_grads_module = get_module("libop_grads")
-
-
 def _get_package_constants(
     config_file: Path = Path(__file__).parent / "pkg_config/run_config.ini",
 ) -> Dict[str, str]:
@@ -165,7 +215,7 @@ def _get_package_constants(
     Parameters
     ----------
     config_file : str, optional
-        path to CONFIG file, by default "config/run_config.ini"
+        path to CONFIG file, by default "pkg_config/run_config.ini"
 
     Returns
     -------
@@ -176,8 +226,14 @@ def _get_package_constants(
     config.read(config_file)
     return dict(config.items("CONFIG"))
 
+
 GLOBAL_CONFIG = _get_package_constants()
 MODEL_VERSION = GLOBAL_CONFIG["model_version"]
+TF_VERSION = GLOBAL_CONFIG["tf_version"]
+TF_CXX11_ABI_FLAG = int(GLOBAL_CONFIG["tf_cxx11_abi_flag"])
+
+op_module = get_module("libop_abi")
+op_grads_module = get_module("libop_grads")
 
 if GLOBAL_CONFIG["precision"] == "-DHIGH_PREC":
     GLOBAL_TF_FLOAT_PRECISION = tf.float64
@@ -221,5 +277,3 @@ def global_cvt_2_ener_float(xx: tf.Tensor) -> tf.Tensor:
         output tensor cast to `GLOBAL_ENER_FLOAT_PRECISION`
     """
     return tf.cast(xx, GLOBAL_ENER_FLOAT_PRECISION)
-
-
diff --git a/source/cmake/Findtensorflow.cmake b/source/cmake/Findtensorflow.cmake
index 8901c698b9..91ed0809a3 100644
--- a/source/cmake/Findtensorflow.cmake
+++ b/source/cmake/Findtensorflow.cmake
@@ -137,10 +137,27 @@ else (BUILD_CPP_IF)
   endif ()
 endif (BUILD_CPP_IF)
 
+# detect TensorFlow version
+try_run(
+  TENSORFLOW_VERSION_RUN_RESULT_VAR TENSORFLOW_VERSION_COMPILE_RESULT_VAR
+  ${CMAKE_CURRENT_BINARY_DIR}/tf_version
+  "${CMAKE_CURRENT_LIST_DIR}/tf_version.cpp"
+  LINK_LIBRARIES ${TensorFlowFramework_LIBRARY}
+  CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}"
+  RUN_OUTPUT_VARIABLE TENSORFLOW_VERSION
+  COMPILE_OUTPUT_VARIABLE TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR
+)
+if (NOT ${TENSORFLOW_VERSION_COMPILE_RESULT_VAR})
+  message(FATAL_ERROR "Failed to compile: \n ${TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR}" )
+endif()
+if (NOT ${TENSORFLOW_VERSION_RUN_RESULT_VAR} EQUAL "0")
+  message(FATAL_ERROR "Failed to run, return code: ${TENSORFLOW_VERSION}" )
+endif()
+
 # print message
 if (NOT TensorFlow_FIND_QUIETLY)
   message(STATUS "Found TensorFlow: ${TensorFlow_INCLUDE_DIRS}, ${TensorFlow_LIBRARY}, ${TensorFlowFramework_LIBRARY} "
-    " in ${TensorFlow_search_PATHS}")
+    " in ${TensorFlow_search_PATHS} (found version \"${TENSORFLOW_VERSION}\")")
 endif ()
 
 unset(TensorFlow_search_PATHS)
diff --git a/source/cmake/tf_version.cpp b/source/cmake/tf_version.cpp
new file mode 100644
index 0000000000..9d129aefb8
--- /dev/null
+++ b/source/cmake/tf_version.cpp
@@ -0,0 +1,10 @@
+#include <iostream>
+#include "tensorflow/core/public/version.h"
+
+int main(int argc, char * argv[])
+{
+  // See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h
+  // TF_VERSION_STRING has been avaiable since TensorFlow v0.6
+  std::cout << TF_VERSION_STRING;
+  return 0;
+}
diff --git a/source/config/run_config.ini b/source/config/run_config.ini
index 3f2e8cc86a..bb04319e47 100644
--- a/source/config/run_config.ini
+++ b/source/config/run_config.ini
@@ -6,5 +6,7 @@ GIT_DATE = @GIT_DATE@
 GIT_BRANCH = @GIT_BRANCH@
 TF_INCLUDE_DIR = @TensorFlow_INCLUDE_DIRS@
 TF_LIBS = @TensorFlow_LIBRARY@
+TF_VERSION = @TENSORFLOW_VERSION@
+TF_CXX11_ABI_FLAG = @OP_CXX_ABI@
 PRECISION = @PREC_DEF@
 MODEL_VERSION=@MODEL_VERSION@