deepmodeling · amcadmus · Feb 10, 2021 · Feb 2, 2021 · Feb 2, 2021 · Feb 3, 2021
diff --git a/README.md b/README.md
@@ -81,7 +81,8 @@ The typical procedure of using DeePMD-kit includes 5 steps
 3. [Analyze training with Tensorboard](doc/tensorboard.md)
 4. [Freeze the model](doc/use-deepmd-kit.md#freeze-a-model)
 5. [Test the model](doc/use-deepmd-kit.md#test-a-model)
-6. [Inference the model in python](doc/use-deepmd-kit.md#model-inference) or using the model in other molecular simulation packages like [LAMMPS](doc/use-deepmd-kit.md#run-md-with-lammps), [i-PI](doc/use-deepmd-kit.md#run-path-integral-md-with-i-pi) or [ASE](doc/use-deepmd-kit.md#use-deep-potential-with-ase).
+6. [Compress the model](doc/use-deepmd-kit.md#compress-a-model)
+7. [Inference the model in python](doc/use-deepmd-kit.md#model-inference) or using the model in other molecular simulation packages like [LAMMPS](doc/use-deepmd-kit.md#run-md-with-lammps), [i-PI](doc/use-deepmd-kit.md#run-path-integral-md-with-i-pi) or [ASE](doc/use-deepmd-kit.md#use-deep-potential-with-ase).
 
 A quick-start on using DeePMD-kit can be found [here](doc/use-deepmd-kit.md).
 

diff --git a/deepmd/common.py b/deepmd/common.py
@@ -20,7 +20,7 @@
 import yaml
 
 from deepmd.env import op_module, tf
-from deepmd.RunOptions import global_tf_float_precision
+from deepmd.RunOptions import global_tf_float_precision, global_np_float_precision
 
 if TYPE_CHECKING:
     _DICT_VAL = TypeVar("_DICT_VAL")
@@ -451,3 +451,15 @@ def dec(obj: "_OBJ") -> "_OBJ":
         return obj
 
     return dec
+
+def get_np_precision(precision):
+    if precision == "default":
+        return  global_np_float_precision
+    elif precision == "float16":
+        return np.float16
+    elif precision == "float32":
+        return np.float32
+    elif precision == "float64":
+        return np.float64
+    else:
+        raise RuntimeError("%d is not a valid precision" % precision)
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
@@ -1,14 +1,16 @@
+import math
 import numpy as np
 from typing import Tuple, List
 
 from deepmd.env import tf
-from deepmd.common import get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter
+from deepmd.common import get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter, get_np_precision
 from deepmd.utils.argcheck import list_to_doc
 from deepmd.RunOptions import global_tf_float_precision
 from deepmd.RunOptions import global_np_float_precision
 from deepmd.env import op_module
 from deepmd.env import default_tf_session_config
 from deepmd.utils.network import embedding_net
+from deepmd.utils.tabulate import DeepTabulate
 
 
 class DescrptSeA ():
@@ -71,6 +73,7 @@ def __init__ (self,
         self.trainable = trainable
         self.filter_activation_fn = get_activation_func(activation_function)
         self.filter_precision = get_precision(precision)
+        self.filter_np_precision = get_np_precision(precision)
         self.exclude_types = set()
         for tt in exclude_types:
             assert(len(tt) == 2)
@@ -96,7 +99,7 @@ def __init__ (self,
         self.useBN = False
         self.dstd = None
         self.davg = None
-
+        self.compress = False
         self.place_holders = {}
         avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
         std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
@@ -226,6 +229,41 @@ def compute_input_stats (self,
             self.davg = np.array(all_davg)
         self.dstd = np.array(all_dstd)
 
+    def enable_compression(self,
+                           min_nbor_dist : float,
+                           model_file : str = 'frozon_model.pb',
+                           table_extrapolate : float = 5,
+                           table_stride_1 : float = 0.01,
+                           table_stride_2 : float = 0.1,
+                           check_frequency : int = -1
+    ) -> None:
+        """
+        Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
+
+        Parameters
+        ----------
+        min_nbor_dist
+                The nearest distance between atoms
+        model_file
+                The original frozen model, which will be compressed by the program
+        table_extrapolate
+                The scale of model extrapolation
+        table_stride_1
+                The uniform stride of the first table
+        table_stride_2
+                The uniform stride of the second table
+        check_frequency
+                The overflow check frequency
+        """
+        self.compress = True
+        self.model_file = model_file
+        self.table_config = [table_extrapolate, table_stride_1, table_stride_2, check_frequency]
+        self.table = DeepTabulate(self.model_file, self.type_one_side)
+        self.lower, self.upper \
+            = self.table.build(min_nbor_dist, 
+                               table_extrapolate, 
+                               table_stride_1, 
+                               table_stride_2)
 
     def build (self, 
                coord_ : tf.Tensor, 
@@ -336,7 +374,6 @@ def build (self,
         # only used when tensorboard was set as true
         tf.summary.histogram('embedding_net_output', self.dout)
         return self.dout
-
 
     def get_rot_mat(self) -> tf.Tensor:
         """
@@ -516,28 +553,38 @@ def _filter(self,
             # with (natom x nei_type_i) x 1
             xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
             # with (natom x nei_type_i) x out_size
-            if (type_input, type_i) not in self.exclude_types:
-                xyz_scatter = embedding_net(xyz_scatter, 
-                                            self.filter_neuron, 
-                                            self.filter_precision, 
-                                            activation_fn = activation_fn, 
-                                            resnet_dt = self.filter_resnet_dt,
-                                            name_suffix = "_"+str(type_i),
-                                            stddev = stddev,
-                                            bavg = bavg,
-                                            seed = seed,
-                                            trainable = trainable)
+            if self.compress and (type_input, type_i) not in self.exclude_types:
+              info = [self.lower, self.upper, self.upper * self.table_config[0], self.table_config[1], self.table_config[2], self.table_config[3]]
+              if self.type_one_side:
+                net = 'filter_-1_net_' + str(type_i)
+              else:
+                net = 'filter_' + str(type_input) + '_net_' + str(type_i)
+              if type_i == 0:
+                xyz_scatter_1  = op_module.tabulate_fusion(self.table.data[net].astype(self.filter_np_precision), info, xyz_scatter, tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), last_layer_size = outputs_size[-1])
+              else:
+                xyz_scatter_1 += op_module.tabulate_fusion(self.table.data[net].astype(self.filter_np_precision), info, xyz_scatter, tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), last_layer_size = outputs_size[-1])
             else:
-              w = tf.zeros((outputs_size[0], outputs_size[-1]), dtype=global_tf_float_precision)
-              xyz_scatter = tf.matmul(xyz_scatter, w)
-            # natom x nei_type_i x out_size
-            xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
-
-            # xyz_scatter_total.append(xyz_scatter)
-            if type_i == 0 :
-                xyz_scatter_1 = tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
-            else :
-                xyz_scatter_1 += tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
+              if (type_input, type_i) not in self.exclude_types:
+                  xyz_scatter = embedding_net(xyz_scatter, 
+                                              self.filter_neuron, 
+                                              self.filter_precision, 
+                                              activation_fn = activation_fn, 
+                                              resnet_dt = self.filter_resnet_dt,
+                                              name_suffix = "_"+str(type_i),
+                                              stddev = stddev,
+                                              bavg = bavg,
+                                              seed = seed,
+                                              trainable = trainable)
+              else:
+                w = tf.zeros((outputs_size[0], outputs_size[-1]), dtype=global_tf_float_precision)
+                xyz_scatter = tf.matmul(xyz_scatter, w)
+              # natom x nei_type_i x out_size
+              xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))  
+              # xyz_scatter_total.append(xyz_scatter)
+              if type_i == 0 :
+                  xyz_scatter_1 = tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
+              else :
+                  xyz_scatter_1 += tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
           # natom x nei x outputs_size
           # xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
           # natom x nei x 4

diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py
@@ -0,0 +1,92 @@
+import math
+import numpy as np
+from tqdm import tqdm
+from deepmd.env import tf
+from typing import Tuple, List
+from deepmd.env import op_module
+from deepmd.env import default_tf_session_config
+from deepmd.RunOptions import global_np_float_precision
+from deepmd.utils.data_system import DeepmdDataSystem
+
+class NeighborStat():
+    """
+    Class for getting training data information. 
+    It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix.
+    """
+    def __init__(self,
+                 ntypes : int,
+                 rcut: float) -> None:
+        """
+        Constructor
+
+        Parameters
+        ----------
+        ntypes
+                The num of atom types
+        rcut
+                The cut-off radius
+        """
+        self.rcut = rcut
+        self.ntypes = ntypes
+        self.place_holders = {}
+        sub_graph = tf.Graph()
+        with sub_graph.as_default():
+            for ii in ['coord', 'box']:
+                self.place_holders[ii] = tf.placeholder(global_np_float_precision, [None, None], name='t_'+ii)
+            self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name='t_type')
+            self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name='t_natoms')
+            self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name='t_mesh')
+            self._max_nbor_size, self._min_nbor_dist \
+                = op_module.neighbor_stat(self.place_holders['coord'],
+                                         self.place_holders['type'],
+                                         self.place_holders['natoms_vec'],
+                                         self.place_holders['box'],
+                                         self.place_holders['default_mesh'],
+                                         rcut = self.rcut)
+        self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
+
+    def get_stat(self,
+                 data : DeepmdDataSystem) -> Tuple[float, List[int]]:
+        """
+        get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms
+
+        Parameters
+        ----------
+        data
+                Class for manipulating many data systems. It is implemented with the help of DeepmdData.
+
+        Returns
+        -------
+        min_nbor_dist
+                The nearest distance between neighbor atoms
+        max_nbor_size
+                A list with ntypes integers, denotes the actual achieved max sel
+        """
+        print(type(data))
+        self.min_nbor_dist = 100.0
+        self.max_nbor_size = [0] * self.ntypes
+
+        for ii in tqdm(range(len(data.system_dirs)), desc = '# DEEPMD: getting data info'):
+            for jj in data.data_systems[ii].dirs:
+                data_set = data.data_systems[ii]._load_set(jj)
+                for kk in range(np.array(data_set['type']).shape[0]):
+                    mn, dt \
+                        = self.sub_sess.run([self._max_nbor_size, self._min_nbor_dist], 
+                                            feed_dict = {
+                                                self.place_holders['coord']: np.array(data_set['coord'])[kk].reshape([-1, data.natoms[ii] * 3]),
+                                                self.place_holders['type']: np.array(data_set['type'])[kk].reshape([-1, data.natoms[ii]]),
+                                                self.place_holders['natoms_vec']: np.array(data.natoms_vec[ii]),
+                                                self.place_holders['box']: np.array(data_set['box'])[kk].reshape([-1, 9]),
+                                                self.place_holders['default_mesh']: np.array(data.default_mesh[ii]),
+                                            })
+                    dt = np.min(dt)
+                    if dt < self.min_nbor_dist:
+                        self.min_nbor_dist = dt
+                    for ww in range(self.ntypes):
+                        var = np.max(mn[:, ww])
+                        if var > self.max_nbor_size[ww]:
+                            self.max_nbor_size[ww] = var
+
+        print('# DEEPMD: training data with min nbor dist: ' + str(self.min_nbor_dist))
+        print('# DEEPMD: training data with max nbor size: ' + str(self.max_nbor_size))
+        return self.min_nbor_dist, self.max_nbor_size