From 63a6ece8f58c571daac2890911cb676d43ca6e28 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Wed, 4 Aug 2021 17:03:39 +0800 Subject: [PATCH 01/12] remove dependences on training script and data from model compression --- deepmd/common.py | 38 ++++++++++++++ deepmd/entrypoints/compress.py | 28 ++++++----- deepmd/entrypoints/freeze.py | 2 + deepmd/entrypoints/main.py | 5 -- deepmd/entrypoints/train.py | 52 ++++++++++---------- deepmd/train/trainer.py | 68 ++++++++++++++++---------- deepmd/utils/argcheck.py | 4 +- deepmd/utils/errors.py | 3 ++ source/tests/test_model_compression.py | 8 +-- 9 files changed, 136 insertions(+), 72 deletions(-) diff --git a/deepmd/common.py b/deepmd/common.py index e4613aeabb..418a1b294a 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -21,6 +21,8 @@ from deepmd.env import op_module, tf from deepmd.env import GLOBAL_TF_FLOAT_PRECISION, GLOBAL_NP_FLOAT_PRECISION +from deepmd.utils.sess import run_sess +from deepmd.utils.errors import GraphWithoutJdataOrMinNborDistError if TYPE_CHECKING: _DICT_VAL = TypeVar("_DICT_VAL") @@ -483,3 +485,39 @@ def get_np_precision(precision: "_PRECISION") -> np.dtype: return np.float64 else: raise RuntimeError(f"{precision} is not a valid precision") + +def load_model_info(model_file: str) -> Tuple[dict, float]: + """Get numpy precision constant from string. + + Parameters + ---------- + model_file : str + The input frozen model, which will be compressed by the deepmd-kit. + + Returns + ------- + jdata + The training script saved in the frozen model + min_nbor_dist + The nearest distance between neighbor atoms saved in the frozen model + + Raises + ------ + GraphWithoutJdataOrMinNborDistError + If the training script or min_nbor_dist are within the frozen model + """ + graph_def = tf.GraphDef() + with open(model_file, "rb") as f: + graph_def.ParseFromString(f.read()) + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name = "") + try: + jdata = graph.get_tensor_by_name('training_script:0') + min_nbor_dist = graph.get_tensor_by_name('min_nbor_dist:0') + except KeyError as e: + raise GraphWithoutJdataOrMinNborDistError() from e + with tf.Session(graph = graph) as sess: + run_sess(sess, [jdata, min_nbor_dist]) + jdata = json.loads(jdata.eval()) + min_nbor_dist = min_nbor_dist.eval() + return jdata, min_nbor_dist \ No newline at end of file diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py index 6b85999426..bad6a06634 100644 --- a/deepmd/entrypoints/compress.py +++ b/deepmd/entrypoints/compress.py @@ -4,10 +4,11 @@ import logging from typing import Optional -from deepmd.common import j_loader +from deepmd.env import tf +from deepmd.common import j_loader, load_model_info, GLOBAL_TF_FLOAT_PRECISION from deepmd.utils.argcheck import normalize from deepmd.utils.compat import updata_deepmd_input -from deepmd.utils.errors import GraphTooLargeError +from deepmd.utils.errors import GraphTooLargeError, GraphWithoutJdataOrMinNborDistError from .freeze import freeze from .train import train @@ -20,7 +21,6 @@ def compress( *, - INPUT: str, input: str, output: str, extrapolate: int, @@ -42,8 +42,6 @@ def compress( Parameters ---------- - INPUT : str - input json/yaml control file input : str frozen model file to compress output : str @@ -63,21 +61,28 @@ def compress( log_level : int logging level """ - jdata = j_loader(INPUT) - if "model" not in jdata.keys(): - jdata = updata_deepmd_input(jdata, warning=True, dump="input_v2_compat.json") + try: + jdata, min_nbor_dist = load_model_info(input) + tf.constant(min_nbor_dist, + name = 'min_nbor_dist', + dtype = GLOBAL_TF_FLOAT_PRECISION) + except GraphWithoutJdataOrMinNborDistError as e: + raise RuntimeError( + "The input frozen model: %s has no training script or min_nbor_dist information," + "which is not supported by the model compression program." + "Please consider using the dp convert-from interface to upgrade the model" % input + ) from e jdata["model"]["compress"] = {} jdata["model"]["compress"]["type"] = 'se_e2_a' jdata["model"]["compress"]["compress"] = True jdata["model"]["compress"]["model_file"] = input + jdata["model"]["compress"]["min_nbor_dist"] = min_nbor_dist jdata["model"]["compress"]["table_config"] = [ extrapolate, step, 10 * step, int(frequency), ] - # be careful here, if one want to refine the model - jdata["training"]["numb_steps"] = jdata["training"]["save_freq"] jdata = normalize(jdata) # check the descriptor info of the input file @@ -90,7 +95,7 @@ def compress( # stage 1: training or refining the model with tabulation log.info("\n\n") - log.info("stage 1: train or refine the model with tabulation") + log.info("stage 1: compress the model") control_file = "compress.json" with open(control_file, "w") as fp: json.dump(jdata, fp, indent=4) @@ -103,6 +108,7 @@ def compress( mpi_log=mpi_log, log_level=log_level, log_path=log_path, + is_compress=True, ) except GraphTooLargeError as e: raise RuntimeError( diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py index a6206abef4..86a5975353 100755 --- a/deepmd/entrypoints/freeze.py +++ b/deepmd/entrypoints/freeze.py @@ -45,6 +45,8 @@ def _make_node_names(model_type: str, modifier_type: Optional[str] = None) -> Li "model_attr/tmap", "model_attr/model_type", "model_attr/model_version", + "min_nbor_dist", + "training_script", ] if model_type == "ener": diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 9557976bf6..d28022ac1f 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -254,11 +254,6 @@ def parse_args(args: Optional[List[str]] = None): help="compress a model", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) - parser_compress.add_argument( - "INPUT", - help="The input parameter file in json or yaml format, which should be " - "consistent with the original model parameter file", - ) parser_compress.add_argument( "-i", "--input", diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index bb0b6d20c0..80c2f37149 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -11,7 +11,7 @@ import numpy as np from deepmd.common import data_requirement, expand_sys_str, j_loader, j_must_have -from deepmd.env import reset_default_tf_session_config +from deepmd.env import tf, reset_default_tf_session_config from deepmd.infer.data_modifier import DipoleChargeModifier from deepmd.train.run_options import BUILD, CITATION, WELCOME, RunOptions from deepmd.train.trainer import DPTrainer @@ -35,6 +35,7 @@ def train( mpi_log: str, log_level: int, log_path: Optional[str], + is_compress: bool = False, **kwargs, ): """Run DeePMD model training. @@ -55,6 +56,8 @@ def train( logging level defined by int 0-3 log_path : Optional[str] logging file path or None if logs are to be output only to stdout + is_compress: Bool + indicates whether in the model compress mode Raises ------ @@ -68,11 +71,15 @@ def train( jdata = normalize(jdata) - jdata = update_sel(jdata) + if is_compress == False: + jdata = update_sel(jdata) with open(output, "w") as fp: json.dump(jdata, fp, indent=4) + # save the training script into the graph + tf.constant(json.dumps(jdata), name='training_script', dtype=tf.string) + # run options run_opt = RunOptions( init_model=init_model, @@ -86,10 +93,10 @@ def train( log.info(message) run_opt.print_resource_summary() - _do_work(jdata, run_opt) + _do_work(jdata, run_opt, is_compress) -def _do_work(jdata: Dict[str, Any], run_opt: RunOptions): +def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: False): """Run serial model training. Parameters @@ -98,6 +105,8 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions): arguments read form json/yaml control file run_opt : RunOptions object with run configuration + is_compress : Bool + indicates whether in model compress mode Raises ------ @@ -112,7 +121,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions): reset_default_tf_session_config(cpu_only=True) # init the model - model = DPTrainer(jdata, run_opt=run_opt) + model = DPTrainer(jdata, run_opt=run_opt, is_compress = is_compress) rcut = model.model.get_rcut() type_map = model.model.get_type_map() if len(type_map) == 0: @@ -129,14 +138,16 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions): # setup data modifier modifier = get_modifier(jdata["model"].get("modifier", None)) - # init data - train_data = get_data(jdata["training"]["training_data"], rcut, ipt_type_map, modifier) - train_data.print_summary("training") - if jdata["training"].get("validation_data", None) is not None: - valid_data = get_data(jdata["training"]["validation_data"], rcut, ipt_type_map, modifier) - valid_data.print_summary("validation") - else: - valid_data = None + # decouple the training data from the model compress process + train_data = None + valid_data = None + if is_compress == False: + # init data + train_data = get_data(jdata["training"]["training_data"], rcut, ipt_type_map, modifier) + train_data.print_summary("training") + if jdata["training"].get("validation_data", None) is not None: + valid_data = get_data(jdata["training"]["validation_data"], rcut, ipt_type_map, modifier) + valid_data.print_summary("validation") # get training info stop_batch = j_must_have(jdata["training"], "numb_steps") @@ -276,22 +287,11 @@ def wrap_up_4(xx): def update_one_sel(jdata, descriptor): - rcut = descriptor['rcut'] - tmp_sel = get_sel(jdata, rcut) if parse_auto_sel(descriptor['sel']) : ratio = parse_auto_sel_ratio(descriptor['sel']) + rcut = descriptor['rcut'] + tmp_sel = get_sel(jdata, rcut) descriptor['sel'] = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel] - else: - # sel is set by user - for ii, (tt, dd) in enumerate(zip(tmp_sel, descriptor['sel'])): - if dd and tt > dd: - # we may skip warning for sel=0, where the user is likely - # to exclude such type in the descriptor - log.warning( - "sel of type %d is not enough! The expected value is " - "not less than %d, but you set it to %d. The accuracy" - " of your model may get worse." %(ii, tt, dd) - ) return descriptor diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 4526c2d469..cefff4043a 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -2,6 +2,7 @@ import logging import os import time +import json import shutil import google.protobuf.message import numpy as np @@ -79,9 +80,11 @@ def _generate_descrpt_from_param_dict(descrpt_param): class DPTrainer (object): def __init__(self, jdata, - run_opt): + run_opt, + is_compress = False): self.run_opt = run_opt self._init_param(jdata) + self.is_compress = is_compress def _init_param(self, jdata): # model config @@ -278,35 +281,40 @@ def _init_param(self, jdata): else: self.valid_numb_batch = 1 - def build (self, - data, + data = None, stop_batch = 0) : self.ntypes = self.model.get_ntypes() - # Usually, the type number of the model should be equal to that of the data - # However, nt_model > nt_data should be allowed, since users may only want to - # train using a dataset that only have some of elements - assert (self.ntypes >= data.get_ntypes()), "ntypes should match that found in data" self.stop_batch = stop_batch - self.batch_size = data.get_batch_size() + # self.batch_size = data.get_batch_size() if self.numb_fparam > 0 : log.info("training with %d frame parameter(s)" % self.numb_fparam) else: log.info("training without frame parameter") - self.type_map = data.get_type_map() - - self.model.data_stat(data) + # self.type_map = data.get_type_map() + if self.is_compress == False : + # Usually, the type number of the model should be equal to that of the data + # However, nt_model > nt_data should be allowed, since users may only want to + # train using a dataset that only have some of elements + assert (self.ntypes >= data.get_ntypes()), "ntypes should match that found in data" + self.model.data_stat(data) - if 'compress' in self.model_param and self.model_param['compress']['compress']: - assert 'rcut' in self.descrpt_param, "Error: descriptor must have attr rcut!" self.neighbor_stat \ = NeighborStat(self.ntypes, self.descrpt_param['rcut']) self.min_nbor_dist, self.max_nbor_size \ = self.neighbor_stat.get_stat(data) - self.descrpt.enable_compression(self.min_nbor_dist, self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3]) + tf.constant(self.min_nbor_dist, + name = 'min_nbor_dist', + dtype = GLOBAL_TF_FLOAT_PRECISION) + tf.constant(self.max_nbor_size, + name = 'max_nbor_size', + dtype = GLOBAL_TF_FLOAT_PRECISION) + else : + assert 'rcut' in self.descrpt_param, "Error: descriptor must have attr rcut!" + self.descrpt.enable_compression(self.model_param['compress']["min_nbor_dist"], self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3]) self._build_lr() self._build_network(data) @@ -321,15 +329,22 @@ def _build_lr(self): def _build_network(self, data): self.place_holders = {} - data_dict = data.get_data_dict() - for kk in data_dict.keys(): - if kk == 'type': - continue - prec = GLOBAL_TF_FLOAT_PRECISION - if data_dict[kk]['high_prec'] : - prec = GLOBAL_ENER_FLOAT_PRECISION - self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk) - self.place_holders['find_'+kk] = tf.placeholder(tf.float32, name = 't_find_' + kk) + if self.is_compress : + for kk in ['coord', 'box']: + self.place_holders[kk] = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], 't_' + kk) + for kk in ['energy', 'force', 'virial', 'atom_ener', 'atom_pref']: + self.place_holders[kk] = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], 't_' + kk) + self.place_holders['find_' + kk] = tf.placeholder(tf.float32, [None], 't_' + kk) + else : + data_dict = data.get_data_dict() + for kk in data_dict.keys(): + if kk == 'type': + continue + prec = GLOBAL_TF_FLOAT_PRECISION + if data_dict[kk]['high_prec'] : + prec = GLOBAL_ENER_FLOAT_PRECISION + self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk) + self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk) self.place_holders['type'] = tf.placeholder(tf.int32, [None], name='t_type') self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name='t_natoms') @@ -412,14 +427,16 @@ def _init_session(self): log.info('receive global variables from task#0') run_sess(self.sess, bcast_op) - def train (self, train_data, valid_data=None) : + def train (self, train_data = None, valid_data=None) : # if valid_data is None: # no validation set specified. # valid_data = train_data # using training set as validation set. stop_batch = self.stop_batch self._init_session() - + if self.is_compress: + self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt) + return # Before data shard is enabled, only cheif do evaluation and record it # self.print_head() fp = None @@ -466,6 +483,7 @@ def train (self, train_data, valid_data=None) : tb_valid_writer = None train_time = 0 + while cur_batch < stop_batch : # first round validation: diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 3f75b39394..76f82c066e 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -332,14 +332,16 @@ def modifier_variant_type_args(): # --- model compression configurations: --- # def model_compression(): - doc_compress = "The name of the frozen model file." + doc_compress = f"The name of the frozen model file." doc_model_file = f"The input model file, which will be compressed by the DeePMD-kit." doc_table_config = f"The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)." + doc_min_nbor_dist = f"The nearest distance between neighbor atoms saved in the frozen model." return [ Argument("compress", bool, optional = False, default = True, doc = doc_compress), Argument("model_file", str, optional = False, default = 'frozen_model.pb', doc = doc_model_file), Argument("table_config", list, optional = False, default = [5, 0.01, 0.1, -1], doc = doc_table_config), + Argument("min_nbor_dist", float, optional = False, default = 0.0, doc = doc_min_nbor_dist), ] # --- model compression configurations: --- # diff --git a/deepmd/utils/errors.py b/deepmd/utils/errors.py index d7b62383f1..84b22ee842 100644 --- a/deepmd/utils/errors.py +++ b/deepmd/utils/errors.py @@ -1,2 +1,5 @@ class GraphTooLargeError(Exception): pass + +class GraphWithoutJdataOrMinNborDistError(Exception): + pass diff --git a/source/tests/test_model_compression.py b/source/tests/test_model_compression.py index d67e209c42..78dfbb26ac 100644 --- a/source/tests/test_model_compression.py +++ b/source/tests/test_model_compression.py @@ -33,7 +33,7 @@ def setUp(self): assert(ret == 0), "DP train error!" ret = os.system("dp freeze -o " + self.frozen_model) assert(ret == 0), "DP freeze error!" - ret = os.system("dp compress " + self.INPUT + " -i " + self.frozen_model + " -o " + self.compressed_model) + ret = os.system("dp compress " + " -i " + self.frozen_model + " -o " + self.compressed_model) assert(ret == 0), "DP model compression error!" self.dp_original = DeepPot(self.frozen_model) @@ -168,7 +168,7 @@ def setUp(self): assert(ret == 0), "DP train error!" ret = os.system("dp freeze -o " + self.frozen_model) assert(ret == 0), "DP freeze error!" - ret = os.system("dp compress " + self.INPUT + " -i " + self.frozen_model + " -o " + self.compressed_model) + ret = os.system("dp compress " + " -i " + self.frozen_model + " -o " + self.compressed_model) assert(ret == 0), "DP model compression error!" self.dp_original = DeepPot(self.frozen_model) @@ -289,7 +289,7 @@ def setUp(self): assert(ret == 0), "DP train error!" ret = os.system("dp freeze -o " + self.frozen_model) assert(ret == 0), "DP freeze error!" - ret = os.system("dp compress " + self.INPUT + " -i " + self.frozen_model + " -o " + self.compressed_model) + ret = os.system("dp compress " + " -i " + self.frozen_model + " -o " + self.compressed_model) assert(ret == 0), "DP model compression error!" self.dp_original = DeepPot(self.frozen_model) @@ -401,7 +401,7 @@ def setUp(self): assert(ret == 0), "DP train error!" ret = os.system("dp freeze -o " + self.frozen_model) assert(ret == 0), "DP freeze error!" - ret = os.system("dp compress " + self.INPUT + " -i " + self.frozen_model + " -o " + self.compressed_model) + ret = os.system("dp compress " + " -i " + self.frozen_model + " -o " + self.compressed_model) assert(ret == 0), "DP model compression error!" self.dp_original = DeepPot(self.frozen_model) From a0453a68c0c9790a12b5cd03acd61c4ddb17eee0 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Wed, 4 Aug 2021 19:47:14 +0800 Subject: [PATCH 02/12] reset function update_one_sel in train.py --- deepmd/entrypoints/train.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index 80c2f37149..92be7bf5f3 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -287,11 +287,22 @@ def wrap_up_4(xx): def update_one_sel(jdata, descriptor): + rcut = descriptor['rcut'] + tmp_sel = get_sel(jdata, rcut) if parse_auto_sel(descriptor['sel']) : ratio = parse_auto_sel_ratio(descriptor['sel']) - rcut = descriptor['rcut'] - tmp_sel = get_sel(jdata, rcut) descriptor['sel'] = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel] + else: + # sel is set by user + for ii, (tt, dd) in enumerate(zip(tmp_sel, descriptor['sel'])): + if dd and tt > dd: + # we may skip warning for sel=0, where the user is likely + # to exclude such type in the descriptor + log.warning( + "sel of type %d is not enough! The expected value is " + "not less than %d, but you set it to %d. The accuracy" + " of your model may get worse." %(ii, tt, dd) + ) return descriptor From b19466279eb2655b038514b0d5e3567f68ae1579 Mon Sep 17 00:00:00 2001 From: Denghui Lu Date: Wed, 4 Aug 2021 20:15:07 +0800 Subject: [PATCH 03/12] update the doc of model compression --- doc/getting-started.md | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/doc/getting-started.md b/doc/getting-started.md index 7b028d7165..0c00588092 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -277,22 +277,18 @@ For more details with respect to definition of model deviation and its applicati Once the frozen model is obtained from deepmd-kit, we can get the neural network structure and its parameters (weights, biases, etc.) from the trained model, and compress it in the following way: ```bash -dp compress input.json -i graph.pb -o graph-compress.pb +dp compress -i graph.pb -o graph-compress.pb ``` -where input.json denotes the original training input script, `-i` gives the original frozen model, `-o` gives the compressed model. Several other command line options can be passed to `dp compress`, which can be checked with +where `-i` gives the original frozen model, `-o` gives the compressed model. Several other command line options can be passed to `dp compress`, which can be checked with ```bash $ dp compress --help ``` An explanation will be provided ``` -usage: dp compress [-h] [-i INPUT] [-o OUTPUT] [-e EXTRAPOLATE] [-s STRIDE] - [-f FREQUENCY] [-d FOLDER] - INPUT - -positional arguments: - INPUT The input parameter file in json or yaml format, which - should be consistent with the original model parameter - file +usage: dp compress [-h] [-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}] [-l LOG_PATH] + [-m {master,collect,workers}] [-i INPUT] [-o OUTPUT] + [-s STEP] [-e EXTRAPOLATE] [-f FREQUENCY] + [-c CHECKPOINT_FOLDER] optional arguments: -h, --help show this help message and exit From 9b65b3724facd83661d02dd43c3b0ea8eee7b598 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Thu, 5 Aug 2021 14:56:42 +0800 Subject: [PATCH 04/12] fix bug in UT --- source/tests/test_argument_parser.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/tests/test_argument_parser.py b/source/tests/test_argument_parser.py index 615199603a..e26a7d7a42 100644 --- a/source/tests/test_argument_parser.py +++ b/source/tests/test_argument_parser.py @@ -196,7 +196,7 @@ def test_parser_log(self): } for parser in ("config", "transfer", "train", "freeze", "test", "compress"): - if parser in ("compress", "train"): + if parser in ("train"): args = {**{"INPUT": dict(type=str, value="INFILE")}, **ARGS} else: args = ARGS @@ -208,7 +208,7 @@ def test_parser_mpi(self): ARGS = {"--mpi-log": dict(type=str, value="master")} for parser in ("train", "compress"): - if parser in ("train", "compress"): + if parser in ("train"): args = {**{"INPUT": dict(type=str, value="INFILE")}, **ARGS} else: args = ARGS @@ -270,7 +270,6 @@ def test_parser_test(self): def test_parser_compress(self): """Test compress subparser.""" ARGS = { - "INPUT": dict(type=str, value="INFILE"), "--output": dict(type=str, value="OUTFILE"), "--extrapolate": dict(type=int, value=5), "--step": dict(type=float, value=0.1), From 431621e25b5f2ad0266dfe044e912567a2707ce9 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Fri, 6 Aug 2021 16:39:08 +0800 Subject: [PATCH 05/12] optimize code for reviewer's comments --- deepmd/common.py | 34 ++++++++++++++--------------- deepmd/entrypoints/compress.py | 20 ++++++++++------- deepmd/entrypoints/freeze.py | 4 ++-- deepmd/entrypoints/train.py | 22 ++++++++++++------- deepmd/train/trainer.py | 40 ++++++++++++++++++++++++---------- deepmd/utils/constant.py | 18 +++++++++++++++ deepmd/utils/errors.py | 2 +- 7 files changed, 93 insertions(+), 47 deletions(-) create mode 100644 deepmd/utils/constant.py diff --git a/deepmd/common.py b/deepmd/common.py index 418a1b294a..ae266ce9ba 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -22,7 +22,7 @@ from deepmd.env import op_module, tf from deepmd.env import GLOBAL_TF_FLOAT_PRECISION, GLOBAL_NP_FLOAT_PRECISION from deepmd.utils.sess import run_sess -from deepmd.utils.errors import GraphWithoutJdataOrMinNborDistError +from deepmd.utils.errors import GraphWithoutTensorError if TYPE_CHECKING: _DICT_VAL = TypeVar("_DICT_VAL") @@ -486,25 +486,27 @@ def get_np_precision(precision: "_PRECISION") -> np.dtype: else: raise RuntimeError(f"{precision} is not a valid precision") -def load_model_info(model_file: str) -> Tuple[dict, float]: - """Get numpy precision constant from string. + +def get_tensor_by_name(model_file: str, + tensor_name: str) -> tf.Tensor: + """Load tensor value from the frozen model(model_file) Parameters ---------- model_file : str - The input frozen model, which will be compressed by the deepmd-kit. + The input frozen model. + tensor : tensor_name + Indicates which tensor which will be loaded from the frozen model. Returns ------- - jdata - The training script saved in the frozen model - min_nbor_dist - The nearest distance between neighbor atoms saved in the frozen model + tf.Tensor + The tensor which was loaded from the frozen model. Raises ------ - GraphWithoutJdataOrMinNborDistError - If the training script or min_nbor_dist are within the frozen model + GraphWithoutTensorError + Whether the tensor_name is within the frozen model. """ graph_def = tf.GraphDef() with open(model_file, "rb") as f: @@ -512,12 +514,10 @@ def load_model_info(model_file: str) -> Tuple[dict, float]: with tf.Graph().as_default() as graph: tf.import_graph_def(graph_def, name = "") try: - jdata = graph.get_tensor_by_name('training_script:0') - min_nbor_dist = graph.get_tensor_by_name('min_nbor_dist:0') + tensor = graph.get_tensor_by_name(tensor_name + ":0") except KeyError as e: - raise GraphWithoutJdataOrMinNborDistError() from e + raise GraphWithoutTensorError() from e with tf.Session(graph = graph) as sess: - run_sess(sess, [jdata, min_nbor_dist]) - jdata = json.loads(jdata.eval()) - min_nbor_dist = min_nbor_dist.eval() - return jdata, min_nbor_dist \ No newline at end of file + run_sess(sess, tensor) + tensor = tensor.eval() + return tensor \ No newline at end of file diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py index bad6a06634..72fe86f307 100644 --- a/deepmd/entrypoints/compress.py +++ b/deepmd/entrypoints/compress.py @@ -5,10 +5,11 @@ from typing import Optional from deepmd.env import tf -from deepmd.common import j_loader, load_model_info, GLOBAL_TF_FLOAT_PRECISION +from deepmd.common import j_loader, get_tensor_by_name, GLOBAL_TF_FLOAT_PRECISION from deepmd.utils.argcheck import normalize from deepmd.utils.compat import updata_deepmd_input -from deepmd.utils.errors import GraphTooLargeError, GraphWithoutJdataOrMinNborDistError +from deepmd.utils.errors import GraphTooLargeError, GraphWithoutTensorError +from deepmd.utils.constant import add_constant_variable from .freeze import freeze from .train import train @@ -62,21 +63,24 @@ def compress( logging level """ try: - jdata, min_nbor_dist = load_model_info(input) - tf.constant(min_nbor_dist, - name = 'min_nbor_dist', - dtype = GLOBAL_TF_FLOAT_PRECISION) - except GraphWithoutJdataOrMinNborDistError as e: + t_jdata = get_tensor_by_name(input, 'train_attr/training_script') + t_min_nbor_dist = get_tensor_by_name(input, 'train_attr/min_nbor_dist') + except GraphWithoutTensorError as e: raise RuntimeError( "The input frozen model: %s has no training script or min_nbor_dist information," "which is not supported by the model compression program." "Please consider using the dp convert-from interface to upgrade the model" % input ) from e + tf.constant(t_min_nbor_dist, + name = 'train_attr/min_nbor_dist', + dtype = GLOBAL_TF_FLOAT_PRECISION) + add_constant_variable('train_attr/min_nbor_dist', t_min_nbor_dist) + jdata = json.loads(t_jdata) jdata["model"]["compress"] = {} jdata["model"]["compress"]["type"] = 'se_e2_a' jdata["model"]["compress"]["compress"] = True jdata["model"]["compress"]["model_file"] = input - jdata["model"]["compress"]["min_nbor_dist"] = min_nbor_dist + jdata["model"]["compress"]["min_nbor_dist"] = t_min_nbor_dist jdata["model"]["compress"]["table_config"] = [ extrapolate, step, diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py index 86a5975353..a8a1294a64 100755 --- a/deepmd/entrypoints/freeze.py +++ b/deepmd/entrypoints/freeze.py @@ -45,8 +45,8 @@ def _make_node_names(model_type: str, modifier_type: Optional[str] = None) -> Li "model_attr/tmap", "model_attr/model_type", "model_attr/model_version", - "min_nbor_dist", - "training_script", + "train_attr/min_nbor_dist", + "train_attr/training_script", ] if model_type == "ener": diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index 92be7bf5f3..c997bca60c 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -20,6 +20,7 @@ from deepmd.utils.data_system import DeepmdDataSystem from deepmd.utils.sess import run_sess from deepmd.utils.neighbor_stat import NeighborStat +from deepmd.utils.constant import add_constant_variable __all__ = ["train"] @@ -78,7 +79,8 @@ def train( json.dump(jdata, fp, indent=4) # save the training script into the graph - tf.constant(json.dumps(jdata), name='training_script', dtype=tf.string) + tf.constant(json.dumps(jdata), name='train_attr/training_script', dtype=tf.string) + add_constant_variable('train_attr/training_script', jdata) # run options run_opt = RunOptions( @@ -96,7 +98,7 @@ def train( _do_work(jdata, run_opt, is_compress) -def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: False): +def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = False): """Run serial model training. Parameters @@ -153,12 +155,16 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: False): stop_batch = j_must_have(jdata["training"], "numb_steps") model.build(train_data, stop_batch) - # train the model with the provided systems in a cyclic way - start_time = time.time() - model.train(train_data, valid_data) - end_time = time.time() - log.info("finished training") - log.info(f"wall time: {(end_time - start_time):.3f} s") + if is_compress == False: + # train the model with the provided systems in a cyclic way + start_time = time.time() + model.train(train_data, valid_data) + end_time = time.time() + log.info("finished training") + log.info(f"wall time: {(end_time - start_time):.3f} s") + else: + model.save_compressed() + log.info("finished compressing") def get_data(jdata: Dict[str, Any], rcut, type_map, modifier): diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index cefff4043a..367f5f8788 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -26,6 +26,7 @@ from deepmd.utils.neighbor_stat import NeighborStat from deepmd.utils.sess import run_sess from deepmd.utils.type_embed import TypeEmbedNet +from deepmd.utils.constant import add_constant_variable from tensorflow.python.client import timeline from deepmd.env import op_module @@ -33,7 +34,7 @@ # load grad of force module import deepmd.op -from deepmd.common import j_must_have, ClassArg +from deepmd.common import j_must_have, ClassArg, data_requirement log = logging.getLogger(__name__) @@ -281,10 +282,16 @@ def _init_param(self, jdata): else: self.valid_numb_batch = 1 + def build (self, data = None, stop_batch = 0) : self.ntypes = self.model.get_ntypes() + # Usually, the type number of the model should be equal to that of the data + # However, nt_model > nt_data should be allowed, since users may only want to + # train using a dataset that only have some of elements + if self.is_compress == False: + assert (self.ntypes >= data.get_ntypes()), "ntypes should match that found in data" self.stop_batch = stop_batch # self.batch_size = data.get_batch_size() @@ -295,7 +302,7 @@ def build (self, log.info("training without frame parameter") # self.type_map = data.get_type_map() - if self.is_compress == False : + if self.is_compress == False: # Usually, the type number of the model should be equal to that of the data # However, nt_model > nt_data should be allowed, since users may only want to # train using a dataset that only have some of elements @@ -307,11 +314,13 @@ def build (self, self.min_nbor_dist, self.max_nbor_size \ = self.neighbor_stat.get_stat(data) tf.constant(self.min_nbor_dist, - name = 'min_nbor_dist', + name = 'train_attr/min_nbor_dist', dtype = GLOBAL_TF_FLOAT_PRECISION) tf.constant(self.max_nbor_size, - name = 'max_nbor_size', + name = 'train_attr/max_nbor_size', dtype = GLOBAL_TF_FLOAT_PRECISION) + add_constant_variable('train_attr/min_nbor_dist', self.min_nbor_dist) + add_constant_variable('train_attr/max_nbor_size', self.max_nbor_size) else : assert 'rcut' in self.descrpt_param, "Error: descriptor must have attr rcut!" self.descrpt.enable_compression(self.model_param['compress']["min_nbor_dist"], self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3]) @@ -332,9 +341,14 @@ def _build_network(self, data): if self.is_compress : for kk in ['coord', 'box']: self.place_holders[kk] = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], 't_' + kk) - for kk in ['energy', 'force', 'virial', 'atom_ener', 'atom_pref']: - self.place_holders[kk] = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], 't_' + kk) - self.place_holders['find_' + kk] = tf.placeholder(tf.float32, [None], 't_' + kk) + for kk in data_requirement.keys(): + if kk == 'type': + continue + prec = GLOBAL_TF_FLOAT_PRECISION + if data_requirement[kk]['high_prec'] : + prec = GLOBAL_ENER_FLOAT_PRECISION + self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk) + self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk) else : data_dict = data.get_data_dict() for kk in data_dict.keys(): @@ -382,6 +396,7 @@ def _build_training(self): name='train_step') train_ops = [apply_op] + self._extra_train_ops self.train_op = tf.group(*train_ops) + self._init_session() log.info("built training") def _init_session(self): @@ -433,10 +448,6 @@ def train (self, train_data = None, valid_data=None) : # valid_data = train_data # using training set as validation set. stop_batch = self.stop_batch - self._init_session() - if self.is_compress: - self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt) - return # Before data shard is enabled, only cheif do evaluation and record it # self.print_head() fp = None @@ -635,3 +646,10 @@ def get_evaluation_results(self, batch_list): sum_results[k] = sum_results.get(k, 0.) + v * results["natoms"] avg_results = {k: v / sum_natoms for k, v in sum_results.items() if not k == "natoms"} return avg_results + + def save_compressed(self): + """ + Save the compressed graph + """ + if self.is_compress: + self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt) \ No newline at end of file diff --git a/deepmd/utils/constant.py b/deepmd/utils/constant.py new file mode 100644 index 0000000000..b73db82b12 --- /dev/null +++ b/deepmd/utils/constant.py @@ -0,0 +1,18 @@ +from deepmd.env import tf + +constant_variables = {} + +def add_constant_variable( + key: str, + var: tf.Tensor +): + """Store the global constant variables. + + Parameters + ---------- + key : str + name of the variable + var : int + variables that need to be stored + """ + constant_variables[key] = var \ No newline at end of file diff --git a/deepmd/utils/errors.py b/deepmd/utils/errors.py index 84b22ee842..4a6617c055 100644 --- a/deepmd/utils/errors.py +++ b/deepmd/utils/errors.py @@ -1,5 +1,5 @@ class GraphTooLargeError(Exception): pass -class GraphWithoutJdataOrMinNborDistError(Exception): +class GraphWithoutTensorError(Exception): pass From 721ea3773c3ef25b08230908c98ee53e22140159 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Fri, 6 Aug 2021 23:06:48 +0800 Subject: [PATCH 06/12] undo changes to constant variables --- deepmd/entrypoints/compress.py | 2 -- deepmd/entrypoints/train.py | 2 -- deepmd/train/trainer.py | 3 --- deepmd/utils/constant.py | 18 ------------------ 4 files changed, 25 deletions(-) delete mode 100644 deepmd/utils/constant.py diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py index 72fe86f307..b0f0f42729 100644 --- a/deepmd/entrypoints/compress.py +++ b/deepmd/entrypoints/compress.py @@ -9,7 +9,6 @@ from deepmd.utils.argcheck import normalize from deepmd.utils.compat import updata_deepmd_input from deepmd.utils.errors import GraphTooLargeError, GraphWithoutTensorError -from deepmd.utils.constant import add_constant_variable from .freeze import freeze from .train import train @@ -74,7 +73,6 @@ def compress( tf.constant(t_min_nbor_dist, name = 'train_attr/min_nbor_dist', dtype = GLOBAL_TF_FLOAT_PRECISION) - add_constant_variable('train_attr/min_nbor_dist', t_min_nbor_dist) jdata = json.loads(t_jdata) jdata["model"]["compress"] = {} jdata["model"]["compress"]["type"] = 'se_e2_a' diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index c997bca60c..ba20f5f7e8 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -20,7 +20,6 @@ from deepmd.utils.data_system import DeepmdDataSystem from deepmd.utils.sess import run_sess from deepmd.utils.neighbor_stat import NeighborStat -from deepmd.utils.constant import add_constant_variable __all__ = ["train"] @@ -80,7 +79,6 @@ def train( # save the training script into the graph tf.constant(json.dumps(jdata), name='train_attr/training_script', dtype=tf.string) - add_constant_variable('train_attr/training_script', jdata) # run options run_opt = RunOptions( diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 367f5f8788..009b077940 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -26,7 +26,6 @@ from deepmd.utils.neighbor_stat import NeighborStat from deepmd.utils.sess import run_sess from deepmd.utils.type_embed import TypeEmbedNet -from deepmd.utils.constant import add_constant_variable from tensorflow.python.client import timeline from deepmd.env import op_module @@ -319,8 +318,6 @@ def build (self, tf.constant(self.max_nbor_size, name = 'train_attr/max_nbor_size', dtype = GLOBAL_TF_FLOAT_PRECISION) - add_constant_variable('train_attr/min_nbor_dist', self.min_nbor_dist) - add_constant_variable('train_attr/max_nbor_size', self.max_nbor_size) else : assert 'rcut' in self.descrpt_param, "Error: descriptor must have attr rcut!" self.descrpt.enable_compression(self.model_param['compress']["min_nbor_dist"], self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3]) diff --git a/deepmd/utils/constant.py b/deepmd/utils/constant.py deleted file mode 100644 index b73db82b12..0000000000 --- a/deepmd/utils/constant.py +++ /dev/null @@ -1,18 +0,0 @@ -from deepmd.env import tf - -constant_variables = {} - -def add_constant_variable( - key: str, - var: tf.Tensor -): - """Store the global constant variables. - - Parameters - ---------- - key : str - name of the variable - var : int - variables that need to be stored - """ - constant_variables[key] = var \ No newline at end of file From dbc56d372ed82f9e8f541e5d40e6f3dc171588d9 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Sat, 7 Aug 2021 11:12:11 +0800 Subject: [PATCH 07/12] Update common.py --- deepmd/common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/deepmd/common.py b/deepmd/common.py index ae266ce9ba..4b2246f358 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -518,6 +518,5 @@ def get_tensor_by_name(model_file: str, except KeyError as e: raise GraphWithoutTensorError() from e with tf.Session(graph = graph) as sess: - run_sess(sess, tensor) - tensor = tensor.eval() + tensor = run_sess(sess, tensor) return tensor \ No newline at end of file From 0f0ff8fabc16511d5f896a680d69ed13c576cf22 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Sat, 7 Aug 2021 19:11:54 +0800 Subject: [PATCH 08/12] update code structure of DPTrainer --- deepmd/train/trainer.py | 44 +++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 009b077940..45bff92d02 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -2,7 +2,6 @@ import logging import os import time -import json import shutil import google.protobuf.message import numpy as np @@ -286,10 +285,10 @@ def build (self, data = None, stop_batch = 0) : self.ntypes = self.model.get_ntypes() - # Usually, the type number of the model should be equal to that of the data - # However, nt_model > nt_data should be allowed, since users may only want to - # train using a dataset that only have some of elements if self.is_compress == False: + # Usually, the type number of the model should be equal to that of the data + # However, nt_model > nt_data should be allowed, since users may only want to + # train using a dataset that only have some of elements assert (self.ntypes >= data.get_ntypes()), "ntypes should match that found in data" self.stop_batch = stop_batch @@ -338,24 +337,9 @@ def _build_network(self, data): if self.is_compress : for kk in ['coord', 'box']: self.place_holders[kk] = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], 't_' + kk) - for kk in data_requirement.keys(): - if kk == 'type': - continue - prec = GLOBAL_TF_FLOAT_PRECISION - if data_requirement[kk]['high_prec'] : - prec = GLOBAL_ENER_FLOAT_PRECISION - self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk) - self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk) + self._get_place_horders(data_requirement) else : - data_dict = data.get_data_dict() - for kk in data_dict.keys(): - if kk == 'type': - continue - prec = GLOBAL_TF_FLOAT_PRECISION - if data_dict[kk]['high_prec'] : - prec = GLOBAL_ENER_FLOAT_PRECISION - self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk) - self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk) + self._get_place_horders(data.get_data_dict()) self.place_holders['type'] = tf.placeholder(tf.int32, [None], name='t_type') self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name='t_natoms') @@ -393,7 +377,6 @@ def _build_training(self): name='train_step') train_ops = [apply_op] + self._extra_train_ops self.train_op = tf.group(*train_ops) - self._init_session() log.info("built training") def _init_session(self): @@ -445,6 +428,8 @@ def train (self, train_data = None, valid_data=None) : # valid_data = train_data # using training set as validation set. stop_batch = self.stop_batch + self._init_session() + # Before data shard is enabled, only cheif do evaluation and record it # self.print_head() fp = None @@ -533,7 +518,7 @@ def train (self, train_data = None, valid_data=None) : train_time = 0 if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.saver is not None: try: - self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt) + self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt)) except google.protobuf.message.DecodeError as e: raise GraphTooLargeError( "The graph size exceeds 2 GB, the hard limitation of protobuf." @@ -648,5 +633,16 @@ def save_compressed(self): """ Save the compressed graph """ + self._init_session() if self.is_compress: - self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt) \ No newline at end of file + self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt)) + + def _get_place_horders(self, data_dict): + for kk in data_dict.keys(): + if kk == 'type': + continue + prec = GLOBAL_TF_FLOAT_PRECISION + if data_dict[kk]['high_prec'] : + prec = GLOBAL_ENER_FLOAT_PRECISION + self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk) + self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk) From 425a896abb66164139cd4334be34bf1df379e33f Mon Sep 17 00:00:00 2001 From: denghuilu Date: Sat, 7 Aug 2021 20:05:55 +0800 Subject: [PATCH 09/12] fix lint warnings in common.py --- deepmd/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deepmd/common.py b/deepmd/common.py index 4b2246f358..60af7b1493 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -512,11 +512,11 @@ def get_tensor_by_name(model_file: str, with open(model_file, "rb") as f: graph_def.ParseFromString(f.read()) with tf.Graph().as_default() as graph: - tf.import_graph_def(graph_def, name = "") + tf.import_graph_def(graph_def, name="") try: tensor = graph.get_tensor_by_name(tensor_name + ":0") except KeyError as e: raise GraphWithoutTensorError() from e - with tf.Session(graph = graph) as sess: + with tf.Session(graph=graph) as sess: tensor = run_sess(sess, tensor) - return tensor \ No newline at end of file + return tensor From 604707a53b31f925cd8abd36b8cbfd0f1812df79 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Mon, 9 Aug 2021 07:42:43 +0800 Subject: [PATCH 10/12] fix duplicated lines within trainer.py --- deepmd/train/trainer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 45bff92d02..7daf18970f 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -285,11 +285,7 @@ def build (self, data = None, stop_batch = 0) : self.ntypes = self.model.get_ntypes() - if self.is_compress == False: - # Usually, the type number of the model should be equal to that of the data - # However, nt_model > nt_data should be allowed, since users may only want to - # train using a dataset that only have some of elements - assert (self.ntypes >= data.get_ntypes()), "ntypes should match that found in data" + self.stop_batch = stop_batch # self.batch_size = data.get_batch_size() From d7695981cf33d9e349c7a91ce18f536e2bfa57f4 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Mon, 9 Aug 2021 07:46:35 +0800 Subject: [PATCH 11/12] Update trainer.py --- deepmd/train/trainer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 7daf18970f..f6ea8aa8a6 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -285,22 +285,20 @@ def build (self, data = None, stop_batch = 0) : self.ntypes = self.model.get_ntypes() - self.stop_batch = stop_batch - # self.batch_size = data.get_batch_size() - if self.numb_fparam > 0 : log.info("training with %d frame parameter(s)" % self.numb_fparam) else: log.info("training without frame parameter") - # self.type_map = data.get_type_map() if self.is_compress == False: # Usually, the type number of the model should be equal to that of the data # However, nt_model > nt_data should be allowed, since users may only want to # train using a dataset that only have some of elements assert (self.ntypes >= data.get_ntypes()), "ntypes should match that found in data" + self.type_map = data.get_type_map() + self.batch_size = data.get_batch_size() self.model.data_stat(data) self.neighbor_stat \ From a1f087dcdaf4faaca8913cecb8abbc81112cd6a5 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Mon, 9 Aug 2021 09:47:30 +0800 Subject: [PATCH 12/12] rm default values with False optional in argcheck.py --- deepmd/utils/argcheck.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 76f82c066e..20b7667b1d 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -338,10 +338,10 @@ def model_compression(): doc_min_nbor_dist = f"The nearest distance between neighbor atoms saved in the frozen model." return [ - Argument("compress", bool, optional = False, default = True, doc = doc_compress), - Argument("model_file", str, optional = False, default = 'frozen_model.pb', doc = doc_model_file), - Argument("table_config", list, optional = False, default = [5, 0.01, 0.1, -1], doc = doc_table_config), - Argument("min_nbor_dist", float, optional = False, default = 0.0, doc = doc_min_nbor_dist), + Argument("compress", bool, optional = False, doc = doc_compress), + Argument("model_file", str, optional = False, doc = doc_model_file), + Argument("table_config", list, optional = False, doc = doc_table_config), + Argument("min_nbor_dist", float, optional = False, doc = doc_min_nbor_dist), ] # --- model compression configurations: --- #