Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6018a96
Merge pull request #5 from denghuilu/api-denghui
denghuilu Feb 2, 2021
b5166a8
model compression
denghuilu Feb 2, 2021
c9525a2
Update deepmd/utils/tabulate.py
denghuilu Feb 3, 2021
0e10156
add package tqdm into setup.py
denghuilu Feb 3, 2021
a2efd34
add the reference of global_np_float_precision into common.py
denghuilu Feb 3, 2021
20ccc0e
optimize the performance of gpu implementations of custome ops
denghuilu Feb 3, 2021
ed571e1
through compression error when resnet_dt is set true for descriptor
denghuilu Feb 3, 2021
043a2f8
update cmake support for Anpere architecture devices
denghuilu Feb 3, 2021
b32e57d
optimize the code structure of model compression
denghuilu Feb 4, 2021
49afc73
change the class name from DataInfo to EnvMatStat
denghuilu Feb 5, 2021
5885eed
optimize the interface of EnvMatStat class
denghuilu Feb 5, 2021
8588554
use standard tensorflow op style for custome ops
denghuilu Feb 5, 2021
feb0669
optimize code structure for method EnvMatStat
denghuilu Feb 6, 2021
0a6bba2
Update CustomeOperation.h
denghuilu Feb 6, 2021
e22fdef
optimize code structure of model compression
denghuilu Feb 7, 2021
131daaa
Merge branch api of https://github.com/deepmodeling/deepmd-kit into a…
denghuilu Feb 7, 2021
95e63e0
Merge branch 'api' of https://github.com/deepmodeling/deepmd-kit into…
denghuilu Feb 7, 2021
7231fb0
optimize code structure of model compression
denghuilu Feb 8, 2021
7dacd88
move the table precision control into descriptor
denghuilu Feb 8, 2021
478603b
Update use-deepmd-kit.md
denghuilu Feb 8, 2021
675270d
add intros of model compression to the README
denghuilu Feb 8, 2021
7748be7
Merge pull request #8 from denghuilu/denghuilu-patch-1
denghuilu Feb 9, 2021
f6f25bb
add type hint and mute warning message
denghuilu Feb 10, 2021
44aa249
add type hint for class NrighborStat
denghuilu Feb 10, 2021
c84b3d1
Merge branch api of https://github.com/deepmodeling/deepmd-kit into a…
denghuilu Feb 10, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ The typical procedure of using DeePMD-kit includes 5 steps
3. [Analyze training with Tensorboard](doc/tensorboard.md)
4. [Freeze the model](doc/use-deepmd-kit.md#freeze-a-model)
5. [Test the model](doc/use-deepmd-kit.md#test-a-model)
6. [Inference the model in python](doc/use-deepmd-kit.md#model-inference) or using the model in other molecular simulation packages like [LAMMPS](doc/use-deepmd-kit.md#run-md-with-lammps), [i-PI](doc/use-deepmd-kit.md#run-path-integral-md-with-i-pi) or [ASE](doc/use-deepmd-kit.md#use-deep-potential-with-ase).
6. [Compress the model](doc/use-deepmd-kit.md#compress-a-model)
7. [Inference the model in python](doc/use-deepmd-kit.md#model-inference) or using the model in other molecular simulation packages like [LAMMPS](doc/use-deepmd-kit.md#run-md-with-lammps), [i-PI](doc/use-deepmd-kit.md#run-path-integral-md-with-i-pi) or [ASE](doc/use-deepmd-kit.md#use-deep-potential-with-ase).

A quick-start on using DeePMD-kit can be found [here](doc/use-deepmd-kit.md).

Expand Down
14 changes: 13 additions & 1 deletion deepmd/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import yaml

from deepmd.env import op_module, tf
from deepmd.RunOptions import global_tf_float_precision
from deepmd.RunOptions import global_tf_float_precision, global_np_float_precision

if TYPE_CHECKING:
_DICT_VAL = TypeVar("_DICT_VAL")
Expand Down Expand Up @@ -451,3 +451,15 @@ def dec(obj: "_OBJ") -> "_OBJ":
return obj

return dec

def get_np_precision(precision):
if precision == "default":
return global_np_float_precision
elif precision == "float16":
return np.float16
elif precision == "float32":
return np.float32
elif precision == "float64":
return np.float64
else:
raise RuntimeError("%d is not a valid precision" % precision)
95 changes: 71 additions & 24 deletions deepmd/descriptor/se_a.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import math
import numpy as np
from typing import Tuple, List

from deepmd.env import tf
from deepmd.common import get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter
from deepmd.common import get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter, get_np_precision
from deepmd.utils.argcheck import list_to_doc
from deepmd.RunOptions import global_tf_float_precision
from deepmd.RunOptions import global_np_float_precision
from deepmd.env import op_module
from deepmd.env import default_tf_session_config
from deepmd.utils.network import embedding_net
from deepmd.utils.tabulate import DeepTabulate


class DescrptSeA ():
Expand Down Expand Up @@ -71,6 +73,7 @@ def __init__ (self,
self.trainable = trainable
self.filter_activation_fn = get_activation_func(activation_function)
self.filter_precision = get_precision(precision)
self.filter_np_precision = get_np_precision(precision)
self.exclude_types = set()
for tt in exclude_types:
assert(len(tt) == 2)
Expand All @@ -96,7 +99,7 @@ def __init__ (self,
self.useBN = False
self.dstd = None
self.davg = None

self.compress = False
self.place_holders = {}
avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision)
Expand Down Expand Up @@ -226,6 +229,41 @@ def compute_input_stats (self,
self.davg = np.array(all_davg)
self.dstd = np.array(all_dstd)

def enable_compression(self,
min_nbor_dist : float,
model_file : str = 'frozon_model.pb',
table_extrapolate : float = 5,
table_stride_1 : float = 0.01,
table_stride_2 : float = 0.1,
check_frequency : int = -1
) -> None:
"""
Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.

Parameters
----------
min_nbor_dist
The nearest distance between atoms
model_file
The original frozen model, which will be compressed by the program
table_extrapolate
The scale of model extrapolation
table_stride_1
The uniform stride of the first table
table_stride_2
The uniform stride of the second table
check_frequency
The overflow check frequency
"""
self.compress = True
self.model_file = model_file
self.table_config = [table_extrapolate, table_stride_1, table_stride_2, check_frequency]
self.table = DeepTabulate(self.model_file, self.type_one_side)
self.lower, self.upper \
= self.table.build(min_nbor_dist,
table_extrapolate,
table_stride_1,
table_stride_2)

def build (self,
coord_ : tf.Tensor,
Expand Down Expand Up @@ -336,7 +374,6 @@ def build (self,
# only used when tensorboard was set as true
tf.summary.histogram('embedding_net_output', self.dout)
return self.dout


def get_rot_mat(self) -> tf.Tensor:
"""
Expand Down Expand Up @@ -516,28 +553,38 @@ def _filter(self,
# with (natom x nei_type_i) x 1
xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
# with (natom x nei_type_i) x out_size
if (type_input, type_i) not in self.exclude_types:
xyz_scatter = embedding_net(xyz_scatter,
self.filter_neuron,
self.filter_precision,
activation_fn = activation_fn,
resnet_dt = self.filter_resnet_dt,
name_suffix = "_"+str(type_i),
stddev = stddev,
bavg = bavg,
seed = seed,
trainable = trainable)
if self.compress and (type_input, type_i) not in self.exclude_types:
info = [self.lower, self.upper, self.upper * self.table_config[0], self.table_config[1], self.table_config[2], self.table_config[3]]
if self.type_one_side:
net = 'filter_-1_net_' + str(type_i)
else:
net = 'filter_' + str(type_input) + '_net_' + str(type_i)
if type_i == 0:
xyz_scatter_1 = op_module.tabulate_fusion(self.table.data[net].astype(self.filter_np_precision), info, xyz_scatter, tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), last_layer_size = outputs_size[-1])
else:
xyz_scatter_1 += op_module.tabulate_fusion(self.table.data[net].astype(self.filter_np_precision), info, xyz_scatter, tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), last_layer_size = outputs_size[-1])
else:
w = tf.zeros((outputs_size[0], outputs_size[-1]), dtype=global_tf_float_precision)
xyz_scatter = tf.matmul(xyz_scatter, w)
# natom x nei_type_i x out_size
xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))

# xyz_scatter_total.append(xyz_scatter)
if type_i == 0 :
xyz_scatter_1 = tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
else :
xyz_scatter_1 += tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
if (type_input, type_i) not in self.exclude_types:
xyz_scatter = embedding_net(xyz_scatter,
self.filter_neuron,
self.filter_precision,
activation_fn = activation_fn,
resnet_dt = self.filter_resnet_dt,
name_suffix = "_"+str(type_i),
stddev = stddev,
bavg = bavg,
seed = seed,
trainable = trainable)
else:
w = tf.zeros((outputs_size[0], outputs_size[-1]), dtype=global_tf_float_precision)
xyz_scatter = tf.matmul(xyz_scatter, w)
# natom x nei_type_i x out_size
xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
# xyz_scatter_total.append(xyz_scatter)
if type_i == 0 :
xyz_scatter_1 = tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
else :
xyz_scatter_1 += tf.matmul(tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
# natom x nei x outputs_size
# xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
# natom x nei x 4
Expand Down
92 changes: 92 additions & 0 deletions deepmd/utils/neighbor_stat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import math
import numpy as np
from tqdm import tqdm
from deepmd.env import tf
from typing import Tuple, List
from deepmd.env import op_module
from deepmd.env import default_tf_session_config
from deepmd.RunOptions import global_np_float_precision
from deepmd.utils.data_system import DeepmdDataSystem

class NeighborStat():
"""
Class for getting training data information.
It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix.
"""
def __init__(self,
ntypes : int,
rcut: float) -> None:
"""
Constructor

Parameters
----------
ntypes
The num of atom types
rcut
The cut-off radius
"""
self.rcut = rcut
self.ntypes = ntypes
self.place_holders = {}
sub_graph = tf.Graph()
with sub_graph.as_default():
for ii in ['coord', 'box']:
self.place_holders[ii] = tf.placeholder(global_np_float_precision, [None, None], name='t_'+ii)
self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name='t_type')
self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name='t_natoms')
self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name='t_mesh')
self._max_nbor_size, self._min_nbor_dist \
= op_module.neighbor_stat(self.place_holders['coord'],
self.place_holders['type'],
self.place_holders['natoms_vec'],
self.place_holders['box'],
self.place_holders['default_mesh'],
rcut = self.rcut)
self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)

def get_stat(self,
data : DeepmdDataSystem) -> Tuple[float, List[int]]:
"""
get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms

Parameters
----------
data
Class for manipulating many data systems. It is implemented with the help of DeepmdData.

Returns
-------
min_nbor_dist
The nearest distance between neighbor atoms
max_nbor_size
A list with ntypes integers, denotes the actual achieved max sel
"""
print(type(data))
self.min_nbor_dist = 100.0
self.max_nbor_size = [0] * self.ntypes

for ii in tqdm(range(len(data.system_dirs)), desc = '# DEEPMD: getting data info'):
for jj in data.data_systems[ii].dirs:
data_set = data.data_systems[ii]._load_set(jj)
for kk in range(np.array(data_set['type']).shape[0]):
mn, dt \
= self.sub_sess.run([self._max_nbor_size, self._min_nbor_dist],
feed_dict = {
self.place_holders['coord']: np.array(data_set['coord'])[kk].reshape([-1, data.natoms[ii] * 3]),
self.place_holders['type']: np.array(data_set['type'])[kk].reshape([-1, data.natoms[ii]]),
self.place_holders['natoms_vec']: np.array(data.natoms_vec[ii]),
self.place_holders['box']: np.array(data_set['box'])[kk].reshape([-1, 9]),
self.place_holders['default_mesh']: np.array(data.default_mesh[ii]),
})
dt = np.min(dt)
if dt < self.min_nbor_dist:
self.min_nbor_dist = dt
for ww in range(self.ntypes):
var = np.max(mn[:, ww])
if var > self.max_nbor_size[ww]:
self.max_nbor_size[ww] = var

print('# DEEPMD: training data with min nbor dist: ' + str(self.min_nbor_dist))
print('# DEEPMD: training data with max nbor size: ' + str(self.max_nbor_size))
return self.min_nbor_dist, self.max_nbor_size
Loading