diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index 60de701886..aae85eeed2 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -30,7 +30,8 @@ def __init__ (self, set_davg_zero: bool = False, activation_function: str = 'tanh', precision: str = 'default', - uniform_seed: bool = False + uniform_seed: bool = False, + name: str = None, ) -> None: """ Constructor @@ -66,6 +67,8 @@ def __init__ (self, The precision of the embedding net parameters. Supported options are {1} uniform_seed Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed + name + Name used to identify the descriptor """ self.sel_a = sel self.rcut_r = rcut @@ -89,7 +92,7 @@ def __init__ (self, self.type_one_side = type_one_side if self.type_one_side and len(exclude_types) != 0: raise RuntimeError('"type_one_side" is not compatible with "exclude_types"') - + self.name = name # descrpt config self.sel_r = [ 0 for ii in range(len(self.sel_a)) ] self.ntypes = len(self.sel_a) diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py index 80d0431f8c..f770aab10c 100644 --- a/deepmd/entrypoints/__init__.py +++ b/deepmd/entrypoints/__init__.py @@ -8,6 +8,7 @@ # import `train` as `train_dp` to avoid the conflict of the # module name `train` and the function name `train` from .train import train as train_dp +from .train_mt import train_mt as train_dp_mt from .transfer import transfer from ..infer.model_devi import make_model_devi from .convert import convert diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py index 6b85999426..267cb0e70b 100644 --- a/deepmd/entrypoints/compress.py +++ b/deepmd/entrypoints/compress.py @@ -103,6 +103,7 @@ def compress( mpi_log=mpi_log, log_level=log_level, log_path=log_path, + multi_task = False, ) except GraphTooLargeError as e: raise RuntimeError( diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 9557976bf6..c4c85b1b85 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -12,6 +12,7 @@ freeze, test, train_dp, + train_dp_mt, transfer, make_model_devi, convert, @@ -162,6 +163,13 @@ def parse_args(args: Optional[List[str]] = None): default="out.json", help="The output file of the parameters used in training.", ) + parser_train.add_argument( + "-mt", + "--multi_task", + action = 'store_true', + help="Whether using multi-task.", + ) + # * freeze script ****************************************************************** parser_frz = subparsers.add_parser( @@ -422,7 +430,10 @@ def main(): dict_args = vars(args) if args.command == "train": - train_dp(**dict_args) + if dict_args['multi_task']: + train_dp_mt(**dict_args) + else: + train_dp(**dict_args) elif args.command == "freeze": freeze(**dict_args) elif args.command == "config": diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index bb0b6d20c0..c9cc2e716b 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -79,7 +79,7 @@ def train( restart=restart, log_path=log_path, log_level=log_level, - mpi_log=mpi_log + mpi_log=mpi_log, ) for message in WELCOME + CITATION + BUILD: @@ -210,14 +210,19 @@ def get_modifier(modi_data=None): return modifier -def get_rcut(jdata): - descrpt_data = jdata['model']['descriptor'] +def parse_rcut(descrpt_data): rcut_list = [] if descrpt_data['type'] == 'hybrid': for ii in descrpt_data['list']: rcut_list.append(ii['rcut']) else: rcut_list.append(descrpt_data['rcut']) + return rcut_list + +def get_rcut(jdata): + descrpt_data = jdata['model']['descriptor'] + rcut_list = [] + rcut_list.extend(parse_rcut(descrpt_data)) return max(rcut_list) @@ -295,12 +300,16 @@ def update_one_sel(jdata, descriptor): return descriptor -def update_sel(jdata): - descrpt_data = jdata['model']['descriptor'] +def parse_auto_descrpt(jdata,descrpt_data): if descrpt_data['type'] == 'hybrid': for ii in range(len(descrpt_data['list'])): descrpt_data['list'][ii] = update_one_sel(jdata, descrpt_data['list'][ii]) else: descrpt_data = update_one_sel(jdata, descrpt_data) + return descrpt_data + +def update_sel(jdata): + descrpt_data = jdata['model']['descriptor'] + descrpt_data = parse_auto_descrpt(jdata, descrpt_data) jdata['model']['descriptor'] = descrpt_data return jdata diff --git a/deepmd/entrypoints/train_mt.py b/deepmd/entrypoints/train_mt.py new file mode 100755 index 0000000000..8bbf14ac03 --- /dev/null +++ b/deepmd/entrypoints/train_mt.py @@ -0,0 +1,260 @@ +"""DeePMD training entrypoint script. + +Can handle local or distributed training. +""" + +import json +import logging +import time +import os +from typing import Dict, List, Optional, Any + +import numpy as np +from deepmd.common import data_requirement, expand_sys_str, j_loader, j_must_have +from deepmd.env import reset_default_tf_session_config +from deepmd.infer.data_modifier import DipoleChargeModifier +from deepmd.train.run_options import BUILD, CITATION, WELCOME, RunOptions +from deepmd.train.trainer import DPTrainer +from deepmd.train.trainer_mt import DPMultitaskTrainer +from deepmd.utils.argcheck import normalize +from deepmd.utils.argcheck_mt import normalize_mt +from deepmd.utils.compat import updata_deepmd_input +from deepmd.utils.data_system import DeepmdDataSystem +from deepmd.utils.data_docker import DeepmdDataDocker +from deepmd.utils.sess import run_sess +from deepmd.utils.neighbor_stat import NeighborStat +from deepmd.entrypoints.train import get_modifier, parse_rcut, get_type_map +from deepmd.entrypoints.train import parse_auto_sel, parse_auto_sel_ratio, wrap_up_4 + +__all__ = ["train"] + +log = logging.getLogger(__name__) + + +def train_mt( + *, + INPUT: str, + init_model: Optional[str], + restart: Optional[str], + output: str, + mpi_log: str, + log_level: int, + log_path: Optional[str], + **kwargs, +): + """Run DeePMD model training. + + Parameters + ---------- + INPUT : str + json/yaml control file + init_model : Optional[str] + path to checkpoint folder or None + restart : Optional[str] + path to checkpoint folder or None + output : str + path for dump file with arguments + mpi_log : str + mpi logging mode + log_level : int + logging level defined by int 0-3 + log_path : Optional[str] + logging file path or None if logs are to be output only to stdout + Raises + ------ + RuntimeError + if distributed training job nem is wrong + """ + # load json database + jdata = j_loader(INPUT) + + jdata = updata_deepmd_input(jdata, warning=True, dump="input_v2_compat.json") + jdata = normalize_mt(jdata) + jdata = update_sel(jdata) + + with open(output, "w") as fp: + json.dump(jdata, fp, indent=4) + + # run options + run_opt = RunOptions( + init_model=init_model, + restart=restart, + log_path=log_path, + log_level=log_level, + mpi_log=mpi_log, + + ) + + for message in WELCOME + CITATION + BUILD: + log.info(message) + + run_opt.print_resource_summary() + _do_work(jdata, run_opt) + + + + +def _do_work(jdata: Dict[str, Any], run_opt: RunOptions): + """Run serial model training. + + Parameters + ---------- + jdata : Dict[str, Any] + arguments read form json/yaml control file + run_opt : RunOptions + object with run configuration + + Raises + ------ + RuntimeError + If unsupported modifier type is selected for model + """ + # make necessary checks + assert "training" in jdata + + # avoid conflict of visible gpus among multipe tf sessions in one process + if run_opt.is_distrib and len(run_opt.gpus or []) > 1: + reset_default_tf_session_config(cpu_only=True) + + # init the model + rcut_list = [] + model = DPMultitaskTrainer(jdata, run_opt=run_opt) + for model_name in model.model_dict.keys(): + sub_model = model.model_dict[model_name] + rcut_list.append(sub_model.get_rcut()) + type_map = sub_model.get_type_map() + rcut = max(rcut_list) + + if len(type_map) == 0: + ipt_type_map = None + else: + ipt_type_map = type_map + + #  init random seed + seed = jdata["training"].get("seed", None) + if seed is not None: + seed = seed % (2 ** 32) + np.random.seed(seed) + + # setup data modifier + modifier = get_modifier(jdata["model"].get("modifier", None)) + if modifier is not None: + raise RuntimeError('modifier is not supported in multi-task training mode yet') + + # init data + train_data = get_data(jdata["training"]["training_data"], rcut, ipt_type_map, modifier) + train_data.print_summary("training") + if jdata["training"].get("validation_data", None) is not None: + valid_data = get_data(jdata["training"]["validation_data"], rcut, ipt_type_map, modifier) + valid_data.print_summary("validation") + else: + valid_data = None + + # get training info + stop_batch = j_must_have(jdata["training"], "numb_steps") + model.build(train_data, stop_batch) + + # train the model with the provided systems in a cyclic way + start_time = time.time() + model.train(train_data, valid_data) + end_time = time.time() + log.info("finished training") + log.info(f"wall time: {(end_time - start_time):.3f} s") + +def get_data(jdata: Dict[str, Any], rcut, type_map, modifier): + systems = j_must_have(jdata, "systems") + batch_size = j_must_have(jdata, "batch_size") + sys_probs = jdata.get("sys_probs", None) + auto_prob = jdata.get("auto_prob", "prob_sys_size") + auto_prob_method = jdata.get("auto_prob_method", "prob_uniform") + + + docker = DeepmdDataDocker( + data_systems=systems, + batch_size = batch_size, + rcut = rcut, + type_map = type_map, # in the data docker is the total type + sys_probs = sys_probs, + auto_prob_style = auto_prob, + auto_prob_style_method = auto_prob_method, + modifier = modifier, + ) + return docker + +def get_rcut(jdata): + descrpt_data = jdata['model']['descriptor'] + rcut_list = [] + for sub_descrpt in descrpt_data: + rcut_list.extend(parse_rcut(sub_descrpt)) + return max(rcut_list) + +def get_sel(jdata, rcut, data_sys_name = None): + max_rcut = get_rcut(jdata) + type_map = get_type_map(jdata) + + if type_map and len(type_map) == 0: + type_map = None + train_data = get_data(jdata["training"]["training_data"], max_rcut, type_map, None) + train_data = train_data.get_data_system(data_sys_name) + + train_data.get_batch() + data_ntypes = train_data.get_ntypes() + if type_map is not None: + map_ntypes = len(type_map) + else: + map_ntypes = data_ntypes + ntypes = max([map_ntypes, data_ntypes]) + + neistat = NeighborStat(ntypes, rcut) + + min_nbor_dist, max_nbor_size = neistat.get_stat(train_data) + + return max_nbor_size + + + +def update_one_sel(jdata, descriptor): + rcut = descriptor['rcut'] + data_sys_name = '' + if 'name' in descriptor.keys(): + sys_name = descriptor['name'] + for sub_task in jdata['training']['tasks']: + # find the data system we want, which using the specific descriptor + if sub_task['descriptor'] == sys_name: + data_sys_name = sub_task['name'] + break + tmp_sel = get_sel(jdata, rcut, data_sys_name) + + if parse_auto_sel(descriptor['sel']) : + ratio = parse_auto_sel_ratio(descriptor['sel']) + descriptor['sel'] = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel] + else: + # sel is set by user + for ii, (tt, dd) in enumerate(zip(tmp_sel, descriptor['sel'])): + if dd and tt > dd: + # we may skip warning for sel=0, where the user is likely + # to exclude such type in the descriptor + log.warning( + "sel of type %d is not enough! The expected value is " + "not less than %d, but you set it to %d. The accuracy" + " of your model may get worse." %(ii, tt, dd) + ) + return descriptor + + +def parse_auto_descrpt(jdata,descrpt_data): + if descrpt_data['type'] == 'hybrid': + for ii in range(len(descrpt_data['list'])): + descrpt_data['list'][ii] = update_one_sel(jdata, descrpt_data['list'][ii]) + else: + descrpt_data = update_one_sel(jdata, descrpt_data) + return descrpt_data + +def update_sel(jdata): + descrpt_data = jdata['model']['descriptor'] + update_descrpt = [] + for sub_descrpt in descrpt_data: + sub_descrpt = parse_auto_descrpt(jdata, sub_descrpt) + update_descrpt.append(sub_descrpt) + jdata['model']['descriptor'] = update_descrpt + return jdata diff --git a/deepmd/fit/dipole.py b/deepmd/fit/dipole.py index 73562951dc..d3f183808f 100644 --- a/deepmd/fit/dipole.py +++ b/deepmd/fit/dipole.py @@ -24,7 +24,8 @@ def __init__ (self, seed : int = None, activation_function : str = 'tanh', precision : str = 'default', - uniform_seed: bool = False + uniform_seed: bool = False, + name : str = None, ) -> None: """ Constructor @@ -68,6 +69,7 @@ def __init__ (self, self.sel_type = [ii for ii in range(self.ntypes)] self.sel_type = sel_type self.seed = seed + self.name = name self.uniform_seed = uniform_seed self.seed_shift = one_layer_rand_seed_shift() self.fitting_activation_fn = get_activation_func(activation_function) @@ -88,6 +90,12 @@ def get_out_size(self) -> int: """ return 3 + def get_name(self) -> str: + """ + Get the name of fitting net + """ + return self.name + def build (self, input_d : tf.Tensor, rot_mat : tf.Tensor, diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py index 03145076cb..04a3b1f2ce 100644 --- a/deepmd/fit/ener.py +++ b/deepmd/fit/ener.py @@ -28,7 +28,8 @@ def __init__ (self, atom_ener : List[float] = [], activation_function : str = 'tanh', precision : str = 'default', - uniform_seed: bool = False + uniform_seed: bool = False, + name: str = None ) -> None: """ Constructor @@ -64,6 +65,8 @@ def __init__ (self, The precision of the embedding net parameters. Supported options are {1} uniform_seed Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed + name + Name used to identify the fitting net """ # model param self.ntypes = descrpt.get_ntypes() @@ -86,6 +89,7 @@ def __init__ (self, self.resnet_dt = resnet_dt self.rcond = rcond self.seed = seed + self.name = name self.uniform_seed = uniform_seed self.seed_shift = one_layer_rand_seed_shift() self.tot_ener_zero = tot_ener_zero @@ -123,6 +127,12 @@ def get_numb_fparam(self) -> int: """ return self.numb_fparam + def get_name(self) -> str: + """ + Get the name of fitting net + """ + return self.name + def get_numb_aparam(self) -> int: """ Get the number of atomic parameters diff --git a/deepmd/fit/polar.py b/deepmd/fit/polar.py index 33c5be378a..75c9cd74a5 100644 --- a/deepmd/fit/polar.py +++ b/deepmd/fit/polar.py @@ -115,7 +115,8 @@ def __init__ (self, seed : int = None, activation_function : str = 'tanh', precision : str = 'default', - uniform_seed: bool = False + uniform_seed: bool = False, + name : str = None, ) -> None: """ Constructor @@ -189,6 +190,7 @@ def __init__ (self, self.dim_rot_mat_1 = descrpt.get_dim_rot_mat_1() self.dim_rot_mat = self.dim_rot_mat_1 * 3 self.useBN = False + self.name = name def get_sel_type(self) -> List[int]: """ @@ -202,6 +204,12 @@ def get_out_size(self) -> int: """ return 9 + def get_name(self) -> str: + """ + Get the name of fitting net + """ + return self.name + def compute_input_stats(self, all_stat, protection = 1e-2): @@ -383,7 +391,8 @@ def __init__ (self, diag_shift : List[float] = None, seed : int = None, activation_function : str = 'tanh', - precision : str = 'default' + precision : str = 'default', + name : str = None, ) -> None: """ Constructor @@ -438,6 +447,12 @@ def get_out_size(self) -> int: Get the output size. Should be 9 """ return self.polar_fitting.get_out_size() + + def get_name(self) -> str: + """ + Get the name of fitting net + """ + return self.name def build (self, input_d, diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py index f3edac8800..2822d205c3 100644 --- a/deepmd/loss/ener.py +++ b/deepmd/loss/ener.py @@ -22,8 +22,10 @@ def __init__ (self, limit_pref_ae : float = 0.0, start_pref_pf : float = 0.0, limit_pref_pf : float = 0.0, - relative_f : float = None + relative_f : float = None , + name : str = None, ) -> None: + self.name = name self.starter_learning_rate = starter_learning_rate self.start_pref_e = start_pref_e self.limit_pref_e = limit_pref_e @@ -128,6 +130,10 @@ def build (self, self.l2_more = more_loss return l2_loss, more_loss + def get_name(self): + # get name of loss + return self.name + def eval(self, sess, feed_dict, natoms): run_data = [ self.l2_l, diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py index 6d7230b6cd..eec7db4f69 100644 --- a/deepmd/model/ener.py +++ b/deepmd/model/ener.py @@ -22,7 +22,8 @@ def __init__ ( use_srtab : str = None, smin_alpha : float = None, sw_rmin : float = None, - sw_rmax : float = None + sw_rmax : float = None, + name : str = None ) -> None: """ Constructor @@ -48,6 +49,8 @@ def __init__ ( The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided. sw_rmin The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided. + name + Name used to identify the model """ # descriptor self.descrpt = descrpt @@ -73,7 +76,10 @@ def __init__ ( self.sw_rmax = sw_rmax else : self.srtab = None + self.name = name + def get_name(self): + return self.name def get_rcut (self) : return self.rcut @@ -111,10 +117,15 @@ def build (self, box, mesh, input_dict, - suffix = '', + suffix = '', # a dict of suffix, including type_embed, descrpt and fitting. reuse = None): - - with tf.variable_scope('model_attr' + suffix, reuse = reuse) : + if not isinstance(suffix,dict): + suffix_dict = {} + suffix_dict['fitting'] = suffix + suffix_dict['descrpt'] = suffix + suffix_dict['type_embed'] = suffix + suffix = suffix_dict + with tf.variable_scope('model_attr' + suffix["fitting"], reuse = reuse) : t_tmap = tf.constant(' '.join(self.type_map), name = 'tmap', dtype = tf.string) @@ -146,7 +157,7 @@ def build (self, type_embedding = self.typeebd.build( self.ntypes, reuse = reuse, - suffix = suffix, + suffix = suffix["type_embed"], ) input_dict['type_embedding'] = type_embedding @@ -157,7 +168,7 @@ def build (self, box, mesh, input_dict, - suffix = suffix, + suffix = suffix["descrpt"], reuse = reuse) dout = tf.identity(dout, name='o_descriptor') @@ -170,7 +181,7 @@ def build (self, natoms, input_dict, reuse = reuse, - suffix = suffix) + suffix = suffix["fitting"]) if self.srtab is not None : sw_lambda, sw_deriv \ @@ -204,8 +215,8 @@ def build (self, else : energy_raw = atom_ener - energy_raw = tf.reshape(energy_raw, [-1, natoms[0]], name = 'o_atom_energy'+suffix) - energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1, name='o_energy'+suffix) + energy_raw = tf.reshape(energy_raw, [-1, natoms[0]], name = 'o_atom_energy'+suffix["fitting"]) + energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1, name='o_energy'+suffix["fitting"]) force, virial, atom_virial \ = self.descrpt.prod_force_virial (atom_ener, natoms) @@ -220,7 +231,7 @@ def build (self, n_r_sel = nnei_r) force = force + sw_force + tab_force - force = tf.reshape (force, [-1, 3 * natoms[1]], name = "o_force"+suffix) + force = tf.reshape (force, [-1, 3 * natoms[1]], name = "o_force"+suffix["fitting"]) if self.srtab is not None : sw_virial, sw_atom_virial \ @@ -235,8 +246,8 @@ def build (self, virial = virial + sw_virial \ + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis = 1) - virial = tf.reshape (virial, [-1, 9], name = "o_virial"+suffix) - atom_virial = tf.reshape (atom_virial, [-1, 9 * natoms[1]], name = "o_atom_virial"+suffix) + virial = tf.reshape (virial, [-1, 9], name = "o_virial"+suffix["fitting"]) + atom_virial = tf.reshape (atom_virial, [-1, 9 * natoms[1]], name = "o_atom_virial"+suffix["fitting"]) model_dict = {} model_dict['energy'] = energy diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py index 4526c2d469..1ceb710630 100644 --- a/deepmd/train/trainer.py +++ b/deepmd/train/trainer.py @@ -83,72 +83,139 @@ def __init__(self, self.run_opt = run_opt self._init_param(jdata) - def _init_param(self, jdata): - # model config - model_param = j_must_have(jdata, 'model') - descrpt_param = j_must_have(model_param, 'descriptor') - fitting_param = j_must_have(model_param, 'fitting_net') - typeebd_param = model_param.get('type_embedding', None) - self.model_param = model_param - self.descrpt_param = descrpt_param - - # descriptor - try: - descrpt_type = descrpt_param['type'] - except KeyError: - raise KeyError('the type of descriptor should be set by `type`') - + def _init_descrpt(self, descrpt_type, jdata): if descrpt_type != 'hybrid': - self.descrpt = _generate_descrpt_from_param_dict(descrpt_param) + tmp_descrpt = _generate_descrpt_from_param_dict(jdata) else : descrpt_list = [] - for ii in descrpt_param.get('list', []): + for ii in jdata.get('list', []): descrpt_list.append(_generate_descrpt_from_param_dict(ii)) - self.descrpt = DescrptHybrid(descrpt_list) + tmp_descrpt = DescrptHybrid(descrpt_list) + return tmp_descrpt - # fitting net - try: - fitting_type = fitting_param['type'] - except: - fitting_type = 'ener' - fitting_param.pop('type', None) - fitting_param['descrpt'] = self.descrpt + def _init_type_embed(self, typeebd_param): + if typeebd_param is not None: + type_embed = TypeEmbedNet( + neuron=typeebd_param['neuron'], + resnet_dt=typeebd_param['resnet_dt'], + activation_function=typeebd_param['activation_function'], + precision=typeebd_param['precision'], + trainable=typeebd_param['trainable'], + seed=typeebd_param['seed'] + ) + else: + type_embed = None + return type_embed + + def _init_fitting(self, descrpt_type, fitting_type, fitting_param): if fitting_type == 'ener': - self.fitting = EnerFitting(**fitting_param) + tmp_fitting = EnerFitting(**fitting_param) # elif fitting_type == 'wfc': # self.fitting = WFCFitting(fitting_param, self.descrpt) elif fitting_type == 'dipole': if descrpt_type == 'se_e2_a': - self.fitting = DipoleFittingSeA(**fitting_param) + tmp_fitting = DipoleFittingSeA(**fitting_param) else : raise RuntimeError('fitting dipole only supports descrptors: se_e2_a') elif fitting_type == 'polar': # if descrpt_type == 'loc_frame': # self.fitting = PolarFittingLocFrame(fitting_param, self.descrpt) if descrpt_type == 'se_e2_a': - self.fitting = PolarFittingSeA(**fitting_param) + tmp_fitting = PolarFittingSeA(**fitting_param) else : raise RuntimeError('fitting polar only supports descrptors: loc_frame and se_e2_a') elif fitting_type == 'global_polar': if descrpt_type == 'se_e2_a': - self.fitting = GlobalPolarFittingSeA(**fitting_param) + tmp_fitting = GlobalPolarFittingSeA(**fitting_param) else : raise RuntimeError('fitting global_polar only supports descrptors: loc_frame and se_e2_a') else : raise RuntimeError('unknow fitting type ' + fitting_type) + return tmp_fitting + + def _init_lr(self, lr_param): + try: + lr_type = lr_param['type'] + except: + lr_type = 'exp' + if lr_type == 'exp': + tmp_lr = LearningRateExp(lr_param['start_lr'], + lr_param['stop_lr'], + lr_param['decay_steps']) + else : + raise RuntimeError('unknown learning_rate type ' + lr_type) + return tmp_lr + + def _init_loss(self, loss_param, fitting_type, lr, model): + try : + loss_type = loss_param.get('type', 'ener') + except: + loss_type = 'ener' + if fitting_type == 'ener': + loss_param.pop('type', None) + loss_param['starter_learning_rate'] = lr.start_lr() + if loss_type == 'ener': + tmp_loss = EnerStdLoss(**loss_param) + elif loss_type == 'ener_dipole': + tmp_loss = EnerDipoleLoss(**loss_param) + else: + raise RuntimeError('unknow loss type') + elif fitting_type == 'wfc': + tmp_loss = TensorLoss(loss_param, + model = model, + tensor_name = 'wfc', + tensor_size = model.get_out_size(), + label_name = 'wfc') + elif fitting_type == 'dipole': + tmp_loss = TensorLoss(loss_param, + model = model, + tensor_name = 'dipole', + tensor_size = 3, + label_name = 'dipole') + elif fitting_type == 'polar': + tmp_loss = TensorLoss(loss_param, + model = model, + tensor_name = 'polar', + tensor_size = 9, + label_name = 'polarizability') + elif fitting_type == 'global_polar': + tmp_loss = TensorLoss(loss_param, + model = model, + tensor_name = 'global_polar', + tensor_size = 9, + atomic = False, + label_name = 'polarizability') + else : + raise RuntimeError('get unknown fitting type when building loss function') + return tmp_loss + + def _init_param(self, jdata): + # model config + model_param = j_must_have(jdata, 'model') + descrpt_param = j_must_have(model_param, 'descriptor') + fitting_param = j_must_have(model_param, 'fitting_net') + typeebd_param = model_param.get('type_embedding', None) + self.model_param = model_param + self.descrpt_param = descrpt_param + + # descriptor + try: + descrpt_type = descrpt_param['type'] + except KeyError: + raise KeyError('the type of descriptor should be set by `type`') + self.descrpt = self._init_descrpt(descrpt_type, self.descrpt_param) + + # fitting net + try: + fitting_type = fitting_param['type'] + except: + fitting_type = 'ener' + fitting_param.pop('type', None) + fitting_param['descrpt'] = self.descrpt + self.fitting = self._init_fitting(descrpt_type, fitting_type, fitting_param) # type embedding - if typeebd_param is not None: - self.typeebd = TypeEmbedNet( - neuron=typeebd_param['neuron'], - resnet_dt=typeebd_param['resnet_dt'], - activation_function=typeebd_param['activation_function'], - precision=typeebd_param['precision'], - trainable=typeebd_param['trainable'], - seed=typeebd_param['seed'] - ) - else: - self.typeebd = None + self.typeebd = self._init_type_embed(typeebd_param) # init model # infer model type by fitting_type @@ -196,16 +263,7 @@ def _init_param(self, jdata): # learning rate lr_param = j_must_have(jdata, 'learning_rate') - try: - lr_type = lr_param['type'] - except: - lr_type = 'exp' - if lr_type == 'exp': - self.lr = LearningRateExp(lr_param['start_lr'], - lr_param['stop_lr'], - lr_param['decay_steps']) - else : - raise RuntimeError('unknown learning_rate type ' + lr_type) + self.lr = self._init_lr(lr_param) # loss # infer loss type by fitting_type @@ -215,43 +273,9 @@ def _init_param(self, jdata): except: loss_param = None loss_type = 'ener' + self.loss = self._init_loss(loss_param, fitting_type, self.lr, self.model) - if fitting_type == 'ener': - loss_param.pop('type', None) - loss_param['starter_learning_rate'] = self.lr.start_lr() - if loss_type == 'ener': - self.loss = EnerStdLoss(**loss_param) - elif loss_type == 'ener_dipole': - self.loss = EnerDipoleLoss(**loss_param) - else: - raise RuntimeError('unknow loss type') - elif fitting_type == 'wfc': - self.loss = TensorLoss(loss_param, - model = self.model, - tensor_name = 'wfc', - tensor_size = self.model.get_out_size(), - label_name = 'wfc') - elif fitting_type == 'dipole': - self.loss = TensorLoss(loss_param, - model = self.model, - tensor_name = 'dipole', - tensor_size = 3, - label_name = 'dipole') - elif fitting_type == 'polar': - self.loss = TensorLoss(loss_param, - model = self.model, - tensor_name = 'polar', - tensor_size = 9, - label_name = 'polarizability') - elif fitting_type == 'global_polar': - self.loss = TensorLoss(loss_param, - model = self.model, - tensor_name = 'global_polar', - tensor_size = 9, - atomic = False, - label_name = 'polarizability') - else : - raise RuntimeError('get unknown fitting type when building loss function') + # training tr_data = jdata['training'] @@ -562,9 +586,13 @@ def valid_on_the_fly(self, cur_batch = self.cur_batch current_lr = run_sess(self.sess, self.learning_rate) if print_header: - self.print_header(fp, train_results, valid_results) - self.print_on_training(fp, train_results, valid_results, cur_batch, current_lr) - + print_str = self.print_header(fp, train_results, valid_results) + fp.write(print_str+'\n') + fp.flush() + print_str = self.print_on_training(fp, train_results, valid_results, cur_batch, current_lr) + fp.write(print_str + '\n') + fp.flush() + @staticmethod def print_header(fp, train_results, valid_results): print_str = '' @@ -577,9 +605,8 @@ def print_header(fp, train_results, valid_results): prop_fmt = ' %11s' for k in train_results.keys(): print_str += prop_fmt % (k + '_trn') - print_str += ' %8s\n' % 'lr' - fp.write(print_str) - fp.flush() + print_str += ' %8s' % 'lr' + return print_str @staticmethod def print_on_training(fp, train_results, valid_results, cur_batch, cur_lr): @@ -594,9 +621,9 @@ def print_on_training(fp, train_results, valid_results, cur_batch, cur_lr): prop_fmt = " %11.2e" for k in train_results.keys(): print_str += prop_fmt % (train_results[k]) - print_str += " %8.1e\n" % cur_lr - fp.write(print_str) - fp.flush() + print_str += " %8.1e" % cur_lr + return print_str + def get_evaluation_results(self, batch_list): if batch_list is None: return None diff --git a/deepmd/train/trainer_mt.py b/deepmd/train/trainer_mt.py new file mode 100644 index 0000000000..8628572427 --- /dev/null +++ b/deepmd/train/trainer_mt.py @@ -0,0 +1,492 @@ +#!/usr/bin/env python3 +import logging +import os +import time +import shutil +import google.protobuf.message +import numpy as np +from deepmd.env import tf +from deepmd.env import default_tf_session_config +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION +from deepmd.fit import EnerFitting, WFCFitting, PolarFittingLocFrame, PolarFittingSeA, GlobalPolarFittingSeA, DipoleFittingSeA +from deepmd.descriptor import DescrptLocFrame +from deepmd.descriptor import DescrptSeA +from deepmd.descriptor import DescrptSeT +from deepmd.descriptor import DescrptSeAEbd +from deepmd.descriptor import DescrptSeAEf +from deepmd.descriptor import DescrptSeR +from deepmd.descriptor import DescrptSeAR +from deepmd.descriptor import DescrptHybrid +from deepmd.model import EnerModel, WFCModel, DipoleModel, PolarModel, GlobalPolarModel +from deepmd.loss import EnerStdLoss, EnerDipoleLoss, TensorLoss +from deepmd.utils.errors import GraphTooLargeError +from deepmd.utils.learning_rate import LearningRateExp +from deepmd.utils.neighbor_stat import NeighborStat +from deepmd.utils.sess import run_sess +from deepmd.utils.type_embed import TypeEmbedNet + +from tensorflow.python.client import timeline +from deepmd.env import op_module +from .trainer import DPTrainer + +# load grad of force module +import deepmd.op + +from deepmd.common import j_must_have, ClassArg +from .trainer import _is_subdir, _generate_descrpt_from_param_dict + +log = logging.getLogger(__name__) + + +class DPMultitaskTrainer (DPTrainer): + def __init__(self, + jdata, + run_opt): + self.run_opt = run_opt + self._init_param(jdata) + + + def _init_param(self, jdata): + # model config + model_param = j_must_have(jdata, 'model') + training_param = j_must_have(jdata, 'training') + descrpt_param = j_must_have(model_param, 'descriptor') + fitting_param = j_must_have(model_param, 'fitting_net') + typeebd_param = model_param.get('type_embedding', None) + model_build_param = j_must_have(training_param, 'tasks') + self.model_param = model_param + self.descrpt_param = descrpt_param + self.fitting_param = fitting_param + self.model_build_param = model_build_param + self.seed = training_param['seed'] + + # descriptor + self.descriptor_dict = {} + self.descriptor_type ={} + for sub_descrpt in self.descrpt_param: + try: + descrpt_type = sub_descrpt['type'] + except KeyError: + raise KeyError('the type of descriptor should be set by `type`') + tmp_descrpt = DPTrainer._init_descrpt(self, descrpt_type, sub_descrpt) + self.descriptor_dict[str(sub_descrpt['name'])] = tmp_descrpt + self.descriptor_type[str(sub_descrpt['name'])] = descrpt_type + + # type embedding (share the same one) + self.typeebd = DPTrainer._init_type_embed(self, typeebd_param) + + lr_param = j_must_have(jdata, 'learning_rate') + lr_param_dict = {} + for sub_lr in lr_param: + lr_param_dict[sub_lr['name']] = sub_lr + loss_param = jdata['loss'] + loss_param_dict = {} + for sub_loss in loss_param: + loss_param_dict[sub_loss['name']] = sub_loss + + self.fitting_descrpt = {} + for sub_model in self.model_build_param: + self.fitting_descrpt[sub_model['fitting_net']] = sub_model['descriptor'] + # fitting net + self.fitting_dict = {} + self.loss_dict = {} + self.lr_dict = {} + self.model_dict = {} + self.fitting_type_dict = {} + for sub_net in fitting_param: + name_fitting = sub_net['name'] + try: + sub_net_type = sub_net['type'] + except: + sub_net_type = 'ener' + self.fitting_type_dict[name_fitting] = sub_net_type + sub_net.pop('type', None) + name_descrpt = self.fitting_descrpt[name_fitting] + sub_net['descrpt'] = self.descriptor_dict[str(name_descrpt)] + descrpt_type = self.descriptor_type[str(name_descrpt)] + self.fitting_dict[name_fitting] = DPTrainer._init_fitting(self, descrpt_type, sub_net_type, sub_net) + + + # init model + # infer model type by fitting_type + + self.model_component = {} + self.method_name_list = [] + for sub_model in self.model_build_param: + fitting_name = sub_model['fitting_net'] + descrpt_name = sub_model['descriptor'] + lr_name = sub_model['learning_rate'] + loss_name = sub_model['loss'] + model_name = sub_model['name'] + self.method_name_list.append(model_name) + sub_model_component = {} + sub_model_component['fitting'] = fitting_name + sub_model_component['descrpt'] = descrpt_name + self.model_component[model_name] = sub_model_component + self.model_dict[model_name] = EnerModel( + self.descriptor_dict[descrpt_name], + self.fitting_dict[fitting_name], + self.typeebd, + #sub_net.get('type_map'), #this is the local type map + model_param.get('type_map'), # this is the total type map + model_param.get('data_stat_nbatch', 10), + model_param.get('data_stat_protect', 1e-2), + model_param.get('use_srtab'), + model_param.get('smin_alpha'), + model_param.get('sw_rmin'), + model_param.get('sw_rmax'), + model_name + ) + + # learning rate + sub_lr = lr_param_dict[lr_name] + self.lr_dict[lr_name] = DPTrainer._init_lr(self, sub_lr) + + # loss + # infer loss type by fitting_type + sub_loss = loss_param_dict[loss_name] + + sub_net_type = self.fitting_type_dict[fitting_name] + self.loss_dict[loss_name] = DPTrainer._init_loss(self, sub_loss, sub_net_type, self.lr_dict[lr_name], self.model_dict[model_name]) + + + + self.l2_l_dict = {} + self.l2_more_dict = {} + for sub_loss in self.loss_dict.keys(): + self.l2_l_dict[sub_loss] = None + self.l2_more_dict[sub_loss] = None + + # training + + tr_data = jdata['training'] + self.disp_file = tr_data.get('disp_file', 'lcurve.out') + self.disp_freq = tr_data.get('disp_freq', 1000) + self.save_freq = tr_data.get('save_freq', 1000) + self.save_ckpt = tr_data.get('save_ckpt', 'model.ckpt') + self.display_in_training = tr_data.get('disp_training', True) + self.timing_in_training = tr_data.get('time_training', True) + self.profiling = tr_data.get('profiling', False) + self.profiling_file = tr_data.get('profiling_file', 'timeline.json') + self.tensorboard = tr_data.get('tensorboard', False) + self.tensorboard_log_dir = tr_data.get('tensorboard_log_dir', 'log') + # self.sys_probs = tr_data['sys_probs'] + # self.auto_prob_style = tr_data['auto_prob'] + self.useBN = False + for fitting_name in self.fitting_dict.keys(): + fitting_type = self.fitting_type_dict[fitting_name] + tmp_fitting = self.fitting_dict[fitting_name] + if fitting_type == 'ener' and tmp_fitting.get_numb_fparam() > 0 : + self.numb_fparam = tmp_fitting.get_numb_fparam() + else : + self.numb_fparam = 0 + break + + if tr_data.get("validation_data", None) is not None: + self.valid_numb_batch = tr_data["validation_data"].get("numb_btch", 1) + else: + self.valid_numb_batch = 1 + + + def build (self, + data, + stop_batch = 0) : + # datadocker + #self.ntypes = len(self.model_param.get('type_map')) + self.ntypes = 0 + for descrpt_name in self.descriptor_dict: + sub_descrpt = self.descriptor_dict[descrpt_name] + self.ntypes+=sub_descrpt.get_ntypes() + + # Usually, the type number of the model should be equal to that of the data + # However, nt_model > nt_data should be allowed, since users may only want to + # train using a dataset that only have some of elements + assert (self.ntypes >= data.get_ntypes()), "ntypes should match that found in data" + self.stop_batch = stop_batch + + self.batch_size = data.get_batch_size() + + if self.numb_fparam > 0 : + log.info("training with %d frame parameter(s)" % self.numb_fparam) + else: + log.info("training without frame parameter") + + self.type_map = data.get_type_map() # this is the total type_map from the datadocker + + for i in range(len(self.model_dict.keys())): + model_name = list(self.model_dict.keys())[i] + sub_model = self.model_dict[model_name] + sub_data = data.get_data_system_idx(i) + sub_model.data_stat(sub_data) + + if 'compress' in self.model_param and self.model_param['compress']['compress']: + assert 'rcut' in self.descrpt_param, "Error: descriptor must have attr rcut!" + self.neighbor_stat \ + = NeighborStat(self.ntypes, self.descrpt_param['rcut']) + self.min_nbor_dist, self.max_nbor_size \ + = self.neighbor_stat.get_stat(data) + self.descrpt.enable_compression(self.min_nbor_dist, self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3]) + + + + + self._build_lr() + self._build_network(data) + self._build_training() + + + def _build_lr(self): + self._extra_train_ops = [] + self.learning_rate_dict = {} + self.global_step = tf.train.get_or_create_global_step() + for lr_name in self.lr_dict.keys(): + self.learning_rate_dict[lr_name] = self.lr_dict[lr_name].build(self.global_step, self.stop_batch) + log.info("built lr") + + def _build_network(self, data): + self.place_holders = {} + data_dict = data.get_data_dict() + data_name = data.get_name() + for kk in data_dict.keys(): + if kk == 'type': + continue + prec = GLOBAL_TF_FLOAT_PRECISION + if data_dict[kk]['high_prec'] : + prec = GLOBAL_ENER_FLOAT_PRECISION + self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk) + self.place_holders['find_'+kk] = tf.placeholder(tf.float32, name = 't_find_' + kk) + + self.place_holders['type'] = tf.placeholder(tf.int32, [None], name='t_type') + self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [data.get_ntypes()+2], name='t_natoms') + self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name='t_mesh') + self.place_holders['is_training'] = tf.placeholder(tf.bool) + + # we will send a suffix dict into the model, + # so that each model can be build by combining these sub module together + for model_name in self.model_dict.keys(): + sub_model = self.model_dict[model_name] + suffix_dict = self.model_component[model_name] + suffix_dict['type_embed'] = 'type_embedding' + tmp_model_pred\ + = sub_model.build (self.place_holders['coord'], + self.place_holders['type'], + self.place_holders['natoms_vec'], + self.place_holders['box'], + self.place_holders['default_mesh'], + self.place_holders, + suffix = suffix_dict, + reuse = tf.AUTO_REUSE) + + + sub_loss = self.loss_dict[model_name] # model name should be the same as fitting and loss + tmp_l2_l, tmp_l2_more\ + = sub_loss.build (self.learning_rate_dict[model_name], + self.place_holders['natoms_vec'], + tmp_model_pred, + self.place_holders, + suffix = model_name) + self.l2_l_dict[model_name] = tmp_l2_l + self.l2_more_dict[model_name] = tmp_l2_more + + log.info("built network") + + def _build_training(self): + trainable_variables = tf.trainable_variables() + self.optimizer_dict = {} + self.train_op_dict = {} + for loss_name in self.loss_dict.keys(): + sub_loss = self.loss_dict[loss_name] + optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate_dict[loss_name]) + self.optimizer_dict[loss_name] = optimizer + grads = tf.gradients(self.l2_l_dict[loss_name], trainable_variables) + apply_op = self.optimizer_dict[loss_name].apply_gradients (zip (grads, trainable_variables), + global_step=self.global_step, + name='train_step') + train_ops = [apply_op] + self._extra_train_ops + self.train_op_dict[loss_name] = tf.group(*train_ops) + log.info("built training") + + def _init_sess_serial(self) : + self.sess = tf.Session(config=default_tf_session_config) + self.saver = tf.train.Saver() + saver = self.saver + if self.run_opt.init_mode == 'init_from_scratch' : + log.info("initialize model from scratch") + init_op = tf.global_variables_initializer() + self.sess.run(init_op) + fp = open(self.disp_file, "w") + fp.close () + elif self.run_opt.init_mode == 'init_from_model' : + log.info("initialize from model %s" % self.run_opt.init_model) + init_op = tf.global_variables_initializer() + self.sess.run(init_op) + saver.restore (self.sess, self.run_opt.init_model) + self.sess.run(self.global_step.assign(0)) + fp = open(self.disp_file, "w") + fp.close () + elif self.run_opt.init_mode == 'restart' : + log.info("restart from model %s" % self.run_opt.restart) + init_op = tf.global_variables_initializer() + self.sess.run(init_op) + saver.restore (self.sess, self.run_opt.restart) + else : + raise RuntimeError ("unkown init mode") + + + def train (self, train_data, valid_data=None) : + + # if valid_data is None: # no validation set specified. + # valid_data = train_data # using training set as validation set. + + stop_batch = self.stop_batch + if self.run_opt.is_distrib : + raise RuntimeError('distributed training for multi-task is not supported at the moment') + else : + self._init_sess_serial() + + # self.print_head() + fp = None + if self.run_opt.is_chief : + fp = open(self.disp_file, "a") + + cur_batch = self.sess.run(self.global_step) + is_first_step = True + self.cur_batch = cur_batch + for lr_name in self.lr_dict.keys(): + tmp_lr = self.lr_dict[lr_name] + log.info("system %s, start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % + (lr_name, + self.sess.run(self.learning_rate_dict[lr_name]), + tmp_lr.value(cur_batch), + tmp_lr.decay_steps_, + tmp_lr.decay_rate_, + tmp_lr.value(stop_batch)) + ) + + prf_options = None + prf_run_metadata = None + if self.profiling : + prf_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) + prf_run_metadata = tf.RunMetadata() + + # set tensorboard execution environment + if self.tensorboard : + summary_merged_op = tf.summary.merge_all() + shutil.rmtree(self.tensorboard_log_dir) + tb_train_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/train', self.sess.graph) + tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/test') + else: + tb_train_writer = None + tb_valid_writer = None + + train_time = 0 + n_methods = train_data.get_nmethod() + + while cur_batch < stop_batch : + + # first round validation: + train_batch = train_data.get_batch() + pick_method = train_batch['pick_method'] + method_name = self.method_name_list[pick_method] + train_batch = train_batch['data'] + + if self.display_in_training and is_first_step: + # we choose valid batch according to the training system + valid_batches = [valid_data.get_batch(pick_method)['data'] for ii in range(self.valid_numb_batch)] if valid_data is not None else None + self.valid_on_the_fly(fp, [train_batch], valid_batches,print_header=True,method = pick_method) + is_first_step = False + + if self.timing_in_training: tic = time.time() + train_feed_dict = DPTrainer.get_feed_dict(self, train_batch, is_training=True) + # use tensorboard to visualize the training of deepmd-kit + # it will takes some extra execution time to generate the tensorboard data + if self.tensorboard : + summary, _ = self.sess.run([summary_merged_op, self.train_op_dict[method_name]], feed_dict=train_feed_dict, + options=prf_options, run_metadata=prf_run_metadata) + tb_train_writer.add_summary(summary, cur_batch) + else : + self.sess.run([self.train_op_dict[method_name]], feed_dict=train_feed_dict, + options=prf_options, run_metadata=prf_run_metadata) + if self.timing_in_training: toc = time.time() + if self.timing_in_training: train_time += toc - tic + cur_batch = self.sess.run(self.global_step) + self.cur_batch = cur_batch + + # on-the-fly validation + if self.display_in_training and (cur_batch % self.disp_freq == 0): + if self.timing_in_training: + tic = time.time() + valid_batches = [valid_data.get_batch(pick_method)['data'] for ii in range(self.valid_numb_batch)] if valid_data is not None else None + self.valid_on_the_fly(fp, [train_batch], valid_batches,method=pick_method) + if self.timing_in_training: + toc = time.time() + test_time = toc - tic + log.info("batch %7d method %s training time %.2f s, testing time %.2f s" + % (cur_batch,pick_method, train_time, test_time)) + train_time = 0 + + if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.run_opt.is_chief : + if self.saver is not None : + self.saver.save (self.sess, os.getcwd() + "/" + self.save_ckpt) + log.info("saved checkpoint %s" % self.save_ckpt) + if self.run_opt.is_chief: + fp.close () + if self.profiling and self.run_opt.is_chief : + fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats) + chrome_trace = fetched_timeline.generate_chrome_trace_format() + with open(self.profiling_file, 'w') as f: + f.write(chrome_trace) + + + # def print_head (self) : # depreciated + # if self.run_opt.is_chief: + # fp = open(self.disp_file, "a") + # print_str = "# %5s" % 'batch' + # print_str += self.loss.print_header() + # print_str += ' %8s\n' % 'lr' + # fp.write(print_str) + # fp.close () + + def valid_on_the_fly(self, + fp, + train_batches, + valid_batches, + print_header=False, + method = None): + train_results = self.get_evaluation_results(train_batches,method) + valid_results = self.get_evaluation_results(valid_batches,method) + + cur_batch = self.cur_batch + current_lr = self.sess.run(self.learning_rate_dict[self.method_name_list[method]]) + if print_header: + print_str = DPTrainer.print_header(fp, train_results, valid_results) + print_str += " %6s" % 'method' + fp.write(print_str+'\n') + fp.flush() + print_str = DPTrainer.print_on_training(fp, train_results, valid_results, cur_batch, current_lr) + print_str += "%8d" % method + fp.write(print_str + '\n') + fp.flush() + + + def get_evaluation_results(self, batch_list,method): + if batch_list is None: return None + numb_batch = len(batch_list) + + sum_results = {} # sum of losses on all atoms + sum_natoms = 0 + for i in range(numb_batch): + batch = batch_list[i] + natoms = batch["natoms_vec"] + feed_dict = DPTrainer.get_feed_dict(self, batch, is_training=False) + sub_loss = self.loss_dict[self.method_name_list[method]] + results = sub_loss.eval(self.sess, feed_dict, natoms) + + for k, v in results.items(): + if k == "natoms": + sum_natoms += v + else: + sum_results[k] = sum_results.get(k, 0.) + v * results["natoms"] + avg_results = {k: v / sum_natoms for k, v in sum_results.items() if not k == "natoms"} + return avg_results diff --git a/deepmd/utils/__init__.py b/deepmd/utils/__init__.py index a54f69b853..847389ff4e 100644 --- a/deepmd/utils/__init__.py +++ b/deepmd/utils/__init__.py @@ -1,6 +1,7 @@ # from .data import DeepmdData from .data_system import DeepmdDataSystem +from .data_docker import DeepmdDataDocker # out-of-dated from .data import DataSets diff --git a/deepmd/utils/argcheck_mt.py b/deepmd/utils/argcheck_mt.py new file mode 100644 index 0000000000..9f58c8a7d6 --- /dev/null +++ b/deepmd/utils/argcheck_mt.py @@ -0,0 +1,218 @@ +from dargs import dargs, Argument, Variant +from .argcheck import list_to_doc, make_link +from .argcheck import type_embedding_args, descrpt_se_a_tpe_args, modifier_dipole_charge, modifier_variant_type_args +from .argcheck import model_compression, model_compression_type_args +from .argcheck import start_pref, limit_pref +from .argcheck import training_data_args, validation_data_args, make_index, gen_doc +from .argcheck import descrpt_local_frame_args, descrpt_se_a_args, descrpt_se_t_args, descrpt_se_r_args, descrpt_se_ar_args, descrpt_hybrid_args +from .argcheck import fitting_ener, fitting_polar, fitting_dipole +from .argcheck import learning_rate_exp, loss_ener, loss_tensor +from .argcheck import loss_variant_type_args +from .argcheck import fitting_variant_type_args +from .argcheck import learning_rate_variant_type_args +from .argcheck import descrpt_variant_type_args +# from deepmd.common import ACTIVATION_FN_DICT, PRECISION_DICT +ACTIVATION_FN_DICT = {} +PRECISION_DICT = {} + + + +# --- model configurations: --- # + + +def model_args(): + doc_type_map = 'A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment.' + doc_data_stat_nbatch = 'The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics.' + doc_data_stat_protect = 'Protect parameter for atomic energy regression.' + doc_type_embedding = "The type embedding." + doc_descrpt = 'A list of DeepMD descriptors identified by key arg `name`.' + doc_fitting = 'A list of DeepMD fitting networks identified by key arg `name`.' + doc_modifier = 'The modifier of model output.' + doc_use_srtab = 'The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.' + doc_smin_alpha = 'The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.' + doc_sw_rmin = 'The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.' + doc_sw_rmax = 'The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.' + doc_compress_config = 'Model compression configurations' + + ca = Argument("model", dict, + [Argument("type_map", list, optional=True, doc=doc_type_map), + Argument("data_stat_nbatch", int, optional=True, default=10, doc=doc_data_stat_nbatch), + Argument("data_stat_protect", float, optional=True, default=1e-2, doc=doc_data_stat_protect), + Argument("use_srtab", str, optional=True, doc=doc_use_srtab), + Argument("smin_alpha", float, optional=True, doc=doc_smin_alpha), + Argument("sw_rmin", float, optional=True, doc=doc_sw_rmin), + Argument("sw_rmax", float, optional=True, doc=doc_sw_rmax), + Argument("type_embedding", dict, type_embedding_args(), [], optional=True, doc=doc_type_embedding), + Argument("descriptor", list, doc=doc_descrpt), + Argument("fitting_net", list, doc=doc_fitting), + Argument("modifier", dict, [], [modifier_variant_type_args()], optional=True, doc=doc_modifier), + Argument("compress", dict, [], [model_compression_type_args()], optional=True, + doc=doc_compress_config) + ]) + # print(ca.gen_doc()) + return ca + + +# --- Learning rate configurations: --- # + +def learning_rate_args(): + doc_lr = "A list of learning rates identified by key arg `name`." + return Argument("learning_rate", list, doc=doc_lr) + + +# --- Loss configurations: --- # + +def loss_args(): + doc_loss = 'A list of losses identified by key arg `name`.' + return Argument('loss', list, doc=doc_loss) + + +# --- Training configurations: --- # + +def task_args(): + doc_name = 'name of task' + doc_descrpt = 'descriptor name for this task' + doc_fitting = 'fitting net name for this task' + doc_learning_rate = 'learning rate name for this task' + doc_loss = 'loss name for this task' + args =[ + Argument("name", str, optional = False, doc = doc_name), + Argument("descriptor", str, optional = False, doc = doc_descrpt), + Argument("fitting_net", str, optional = False, doc = doc_fitting), + Argument("learning_rate", str, optional = False, doc = doc_learning_rate), + Argument("loss", str, optional = False, doc = doc_loss), + ] + return Argument("task", dict, args) + + +def training_data_args_mt(): + link_sys = make_link("systems", "training/training_data/systems") + doc_systems = 'The data systems for training. ' \ + 'This key can be provided with a list that specifies the systems, or be provided with a string ' \ + 'by which the prefix of all systems are given and the list of the systems is automatically generated.' + doc_set_prefix = f'The prefix of the sets in the {link_sys}.' + doc_batch_size = f'This key can be \n\n\ +- list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\ +- int: all {link_sys} use the same batch size.\n\n\ +- string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\ +- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.' + doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\ +- "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\ +- "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\ +- "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.' + doc_sys_probs = "A list of float if specified. " \ + "Should be of the same length as `systems`, " \ + "specifying the probability of each system." + + + args = [ + Argument("systems", [list, str], optional=False, default=".", doc=doc_systems), + Argument("set_prefix", str, optional=True, default='set', doc=doc_set_prefix), + Argument("batch_size", [list, int, str], optional=True, default='auto', doc=doc_batch_size), + Argument("auto_prob", str, optional=True, default="prob_sys_size", + doc=doc_auto_prob_style, alias=["auto_prob_style",]), + Argument("auto_prob_method", str, optional=True, default="prob_uniform", doc=doc_auto_prob_style, alias=["auto_prob_style_method",]), + Argument("sys_probs", list, optional=True, default=None, doc=doc_sys_probs, alias=["sys_weights"]), + ] + + doc_training_data = "Configurations of training data." + return Argument("training_data", dict, optional=False, + sub_fields=args, sub_variants=[], doc=doc_training_data) + +def training_args(): # ! modified by Ziyao: data configuration isolated. + doc_tasks = 'A list of tasks to be trained.' + doc_numb_steps = 'Number of training batch. Each training uses one batch of data.' + doc_seed = 'The random seed for getting frames from the training data set.' + doc_disp_file = 'The file for printing learning curve.' + doc_disp_freq = 'The frequency of printing learning curve.' + doc_numb_test = 'Number of frames used for the test during training.' + doc_save_freq = 'The frequency of saving check point.' + doc_save_ckpt = 'The file name of saving check point.' + doc_disp_training = 'Displaying verbose information during training.' + doc_time_training = 'Timing durining training.' + doc_profiling = 'Profiling during training.' + doc_profiling_file = 'Output file for profiling.' + doc_tensorboard = 'Enable tensorboard' + doc_tensorboard_log_dir = 'The log directory of tensorboard outputs' + + arg_training_data = training_data_args_mt() + arg_validation_data = validation_data_args() + + args = [ + arg_training_data, + arg_validation_data, + Argument("tasks", list, optional=False, doc=doc_tasks, alias=["sub_models"]), + Argument("numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]), + Argument("seed", [int, None], optional=True, doc=doc_seed), + Argument("disp_file", str, optional=True, default='lcurve.out', doc=doc_disp_file), + Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq), + Argument("numb_test", [list, int, str], optional=True, default=1, doc=doc_numb_test), + Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq), + Argument("save_ckpt", str, optional=True, default='model.ckpt', doc=doc_save_ckpt), + Argument("disp_training", bool, optional=True, default=True, doc=doc_disp_training), + Argument("time_training", bool, optional=True, default=True, doc=doc_time_training), + Argument("profiling", bool, optional=True, default=False, doc=doc_profiling), + Argument("profiling_file", str, optional=True, default='timeline.json', doc=doc_profiling_file), + Argument("tensorboard", bool, optional=True, default=False, doc=doc_tensorboard), + Argument("tensorboard_log_dir", str, optional=True, default='log', doc=doc_tensorboard_log_dir), + ] + + doc_training = 'The training options.' + return Argument("training", dict, args, [], doc=doc_training) + + + +def normalize_list_of_args(old_list, pattern_arg): + if isinstance(pattern_arg, Variant): + pattern = Argument("base", dict, [Argument("name", str, optional=True, default='', doc="name of the component."), + ], [pattern_arg], doc="") + elif isinstance(pattern_arg, Argument): + pattern = pattern_arg + else: + raise AssertionError("Wrong type of input pattern argument: %s" % str(pattern_arg)) + new_list = [pattern.normalize_value(ii, trim_pattern="_*") for ii in old_list] + [pattern.check_value(ii, strict=True) for ii in new_list] + return new_list + + +def normalize_hybrid_list(hy_list): + new_list = [] + base = Argument("base", dict, [ + Argument("name", str, optional=True, default='', doc="Descriptor name."), + ], [descrpt_variant_type_args()], doc="") + for ii in range(len(hy_list)): + data = base.normalize_value(hy_list[ii], trim_pattern="_*") + base.check_value(data, strict=True) + new_list.append(data) + return new_list + + +def normalize_mt(data): + ma = model_args() + lra = learning_rate_args() + la = loss_args() + ta = training_args() + + base = Argument("base", dict, [ma, lra, la, ta]) + data = base.normalize_value(data, trim_pattern="_*") + base.check_value(data, strict=True) + + data["model"]["descriptor"] = normalize_list_of_args(data["model"]["descriptor"], descrpt_variant_type_args()) + data["model"]["fitting_net"] = normalize_list_of_args(data["model"]["fitting_net"], fitting_variant_type_args()) + data["loss"] = normalize_list_of_args(data["loss"], loss_variant_type_args()) + data["learning_rate"] = normalize_list_of_args(data["learning_rate"], learning_rate_variant_type_args()) + data["training"]["tasks"] = normalize_list_of_args(data["training"]["tasks"], task_args()) + + # normalize hybrid descriptors + descrpts = data["model"]["descriptor"] + for ii in range(len(descrpts)): + if descrpts[ii]["type"] == "hybrid": + descrpts[ii] = normalize_hybrid_list(descrpts[ii]["list"]) + data["model"]["descriptor"] = descrpts + + return data + + +if __name__ == '__main__': + gen_doc() + diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index 903190fb54..778b358a31 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -23,7 +23,8 @@ def __init__ (self, shuffle_test : bool = True, type_map : List[str] = None, modifier = None, - trn_all_set : bool = False) : + trn_all_set : bool = False, + name : str = None) : """ Constructor @@ -41,9 +42,12 @@ def __init__ (self, Data modifier that has the method `modify_data` trn_all_set Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test. + name + Name used to identify the data """ self.dirs = glob.glob (os.path.join(sys_path, set_prefix + ".*")) self.dirs.sort() + self.name = name # load atom type self.atom_type = self._load_type(sys_path) self.natoms = len(self.atom_type) @@ -58,6 +62,7 @@ def __init__ (self, atom_type_ = [type_map.index(self.type_map[ii]) for ii in self.atom_type] self.atom_type = np.array(atom_type_, dtype = np.int32) self.type_map = type_map + # make idx map self.idx_map = self._make_idx_map(self.atom_type) # train dirs @@ -242,6 +247,12 @@ def get_test (self, self.modifier.modify_data(ret) return ret + def get_name(self): + """ + Get name of the data + """ + return self.name + def get_ntypes(self) -> int: """ Number of atom types in the system diff --git a/deepmd/utils/data_docker.py b/deepmd/utils/data_docker.py new file mode 100644 index 0000000000..18075eecd1 --- /dev/null +++ b/deepmd/utils/data_docker.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 + +import logging +import os +import collections +import warnings +import numpy as np +from typing import Tuple, List + +from deepmd.utils.data import DataSets +from deepmd.utils.data import DeepmdData +from deepmd.utils.data_system import DeepmdDataSystem, DataSystem +from deepmd.common import data_requirement + +log = logging.getLogger(__name__) + +class DeepmdDataDocker() : + """ + Class for manipulating many data dockers. + It is implemented with the help of DeepmdData + """ + def __init__ (self, + data_systems, + batch_size : int, + rcut : int, + type_map : List[str] = None, + sys_probs = None, + auto_prob_style = "prob_sys_size", + auto_prob_style_method = "prob_uniform", + modifier = None, + ) : + """ + Constructor + + Parameters + ---------- + datasystems + Combination of several DeepmdDataSystems + batch_size + The batch size + type_map + Gives the name of different atom types + sys_probs + sys_probs of systems in a DeepmdDataSystem + auto_prob_style + auto_prob_style of the systems in a DeepmdDataSystem + auto_prob_style_method + auto_prob_style of the methods in the DeepmdDataDocker + modifier + Data modifier that has the method `modify_data` + """ + # init data + total_data = [] + for sub_sys in data_systems: + sys_name = sub_sys['name'] + data = DeepmdDataSystem( + systems=sub_sys['data'], + batch_size=batch_size, + test_size=1, # to satisfy the old api + shuffle_test=True, # to satisfy the old api + rcut=rcut, + #type_map=sub_fitting['type_map'], # this is the local type map + type_map=type_map, # this is the local type map + modifier=modifier, + trn_all_set=True, # sample from all sets + sys_probs=sys_probs, + auto_prob_style=auto_prob_style, + name = sys_name + ) + data.add_dict(data_requirement) + total_data.append(data) + self.data_systems = total_data + self.batch_size = batch_size + self.sys_probs = sys_probs + # natoms, nbatches + self.nmethod = len(self.data_systems) + self.pick_idx = 0 + nbatch_list = [] + batch_size_list=[] + name_list = [] + method_nbatch = [] + for ii in range(self.nmethod) : + nbatch_list.extend(self.data_systems[ii].get_nbatches()) + method_nbatch.append(np.sum(self.data_systems[ii].get_nbatches())) + batch_size_list.extend(self.data_systems[ii].get_batch_size()) + name_list.append(str(self.data_systems[ii].get_name())) + self.type_map = type_map + self.nbatches = list(nbatch_list) + self.method_nbatch = list(method_nbatch) + self.batch_size = list(batch_size_list) + self.name_list = list(name_list) + self.prob_nmethod = [ float(i) for i in self.method_nbatch] / np.sum(self.method_nbatch) + self.set_sys_probs(sys_probs, auto_prob_style_method) + # self.sys_probs is a probability of choosing different DeepmdDataSystem + # first choose the prob of DeepmdDataSystem and then choose the prob of DeepmdData + + + + + def get_nmethod(self): + return self.nmethod + + def set_sys_probs(self, sys_probs=None, + auto_prob_style: str = "prob_sys_size"): + if sys_probs is None : + if auto_prob_style == "prob_uniform": + prob_v = 1./float(self.nmethod) + probs = [prob_v for ii in range(self.nmethod)] + elif auto_prob_style == "prob_sys_size": + probs = self.prob_nmethod + elif auto_prob_style[:14] == "prob_sys_size;": + probs = self._prob_sys_size_ext(auto_prob_style) + else: + raise RuntimeError("Unknown auto prob style: " + auto_prob_style) + else: + probs = self._process_sys_probs(sys_probs) + self.sys_probs = probs + + def _get_sys_probs(self, + sys_probs, + auto_prob_style) : # depreciated + if sys_probs is None : + if auto_prob_style == "prob_uniform" : + prob_v = 1./float(self.nmethod) + prob = [prob_v for ii in range(self.nmethod)] + elif auto_prob_style == "prob_sys_size" : + prob = self.prob_nmethod + elif auto_prob_style[:14] == "prob_sys_size;" : + prob = self._prob_sys_size_ext(auto_prob_style) + else : + raise RuntimeError("unkown style " + auto_prob_style ) + else : + prob = self._process_sys_probs(sys_probs) + return prob + + def get_batch(self, method_idx : int = None, sys_idx : int = None): + # batch generation style be the same as DeepmdDataSystem + """ + Get a batch of data from the data systems + + Parameters + ---------- + method_idx: int + The index of method from which the batch is get. + sys_idx: int + The index of system from which the batch is get. + If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored + If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following. + """ + if method_idx is not None : + self.pick_idx = method_idx + else : + # prob = self._get_sys_probs(sys_probs, auto_prob_style) + self.pick_idx = np.random.choice(np.arange(self.nmethod), p=self.sys_probs) + + s_data = self.data_systems[self.pick_idx] + b_data = {} + b_data['data'] = s_data.get_batch(sys_idx) + b_data['pick_method'] = self.pick_idx + + return b_data + + + def get_data_system(self,name): + for iname, idata_system in zip(self.name_list, self.data_systems): + if iname == name: + return idata_system + + def get_data_system_idx(self,idx): + return self.data_systems[idx] + + def get_type_map(self) -> List[str]: + """ + Get the type map + """ + return self.type_map + + def get_nbatches (self) -> int: + """ + Get the total number of batches + """ + + return self.nbatches + + def get_ntypes (self) -> int: + """ + Get the number of types + """ + return len(self.type_map) + + def get_batch_size(self) -> int: + """ + Get the batch size + """ + return self.batch_size + + def get_data_dict(self, ii: int = 0) -> dict: + return self.data_systems[ii].get_data_dict() + + def get_name(self): + return self.name_list + + def print_summary(self, name) : + # width 65 + sys_width = 42 + log.info(f"---Summary of DataSystem: {name:13s}-----------------------------------------------") + log.info("found %d methods(s):" % self.nmethod) + for jj in range(self.nmethod): + tmp_sys = self.data_systems[jj] + tmp_sys_prob = self.sys_probs[jj] + + log.info(("%s " % tmp_sys._format_name_length('system', sys_width)) + + ("%6s %6s %6s %5s %3s" % ('natoms', 'bch_sz', 'n_bch', 'prob', 'pbc'))) + for ii in range(tmp_sys.nsystems) : + log.info("%s %6d %6d %6d %5.3f %3s" % + (tmp_sys._format_name_length(tmp_sys.system_dirs[ii], sys_width), + tmp_sys.natoms[ii], + # TODO batch size * nbatches = number of structures + tmp_sys.batch_size[ii], + tmp_sys.nbatches[ii], + tmp_sys_prob*tmp_sys.sys_probs[ii], + "T" if tmp_sys.data_systems[ii].pbc else "F" + ) ) + log.info("--------------------------------------------------------------------------------------") + diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py index 8b84319eb2..26fe2e119b 100644 --- a/deepmd/utils/data_system.py +++ b/deepmd/utils/data_system.py @@ -9,6 +9,7 @@ from deepmd.utils.data import DataSets from deepmd.utils.data import DeepmdData +from deepmd.common import data_requirement log = logging.getLogger(__name__) @@ -29,7 +30,8 @@ def __init__ (self, modifier = None, trn_all_set = False, sys_probs = None, - auto_prob_style ="prob_sys_size") : + auto_prob_style ="prob_sys_size", + name : str = None,) : """ Constructor @@ -48,7 +50,7 @@ def __init__ (self, shuffle_test If the test data are shuffled type_map - Gives the name of different atom types + Gives the name of different atom types, input is the total type map modifier Data modifier that has the method `modify_data` trn_all_set @@ -66,21 +68,26 @@ def __init__ (self, the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional - to the number of batches in the system.""" + to the number of batches in the system. + name + Name used to identify the data system + """ # init data self.rcut = rcut self.system_dirs = systems self.nsystems = len(self.system_dirs) self.data_systems = [] - for ii in self.system_dirs : + self.name = name + for ii in self.system_dirs : #each system as a DeepmdData, these are all from a same method self.data_systems.append( DeepmdData( - ii, + sys_path = ii, set_prefix=set_prefix, shuffle_test=shuffle_test, type_map = type_map, modifier = modifier, - trn_all_set = trn_all_set + trn_all_set = trn_all_set, + name = name )) # batch size self.batch_size = batch_size @@ -107,6 +114,8 @@ def __init__ (self, for ii in self.data_systems : ntypes.append(ii.get_ntypes()) self.sys_ntypes = max(ntypes) + if type_map is not None: + self.sys_ntypes = len(type_map) self.natoms = [] self.natoms_vec = [] self.nbatches = [] @@ -116,7 +125,7 @@ def __init__ (self, self.natoms_vec.append(self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int)) self.nbatches.append(self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii])) type_map_list.append(self.data_systems[ii].get_type_map()) - self.type_map = self._check_type_map_consistency(type_map_list) + self.type_map = self._check_type_map_consistency(type_map_list) # this is the type map of this system # ! altered by Marián Rynik # test size @@ -160,7 +169,6 @@ def __init__ (self, warnings.warn("system %s required test size is larger than the size of the dataset %s (%d > %d)" % \ (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1])) - def _load_test(self, ntests = -1): self.test_data = collections.defaultdict(list) for ii in range(self.nsystems) : @@ -412,6 +420,9 @@ def get_batch_size(self) -> int: """ return self.batch_size + def get_name(self) -> str: + return self.name + def _format_name_length(self, name, width) : if len(name) <= width: return '{: >{}}'.format(name, width) @@ -704,17 +715,4 @@ def get_batch_size(self) : def numb_fparam(self) : return self.has_fparam -def _main () : - sys = ['/home/wanghan/study/deep.md/results.01/data/mos2/only_raws/20', - '/home/wanghan/study/deep.md/results.01/data/mos2/only_raws/30', - '/home/wanghan/study/deep.md/results.01/data/mos2/only_raws/38', - '/home/wanghan/study/deep.md/results.01/data/mos2/only_raws/MoS2', - '/home/wanghan/study/deep.md/results.01/data/mos2/only_raws/Pt_cluster'] - set_prefix = 'set' - ds = DataSystem (sys, set_prefix, 4, 6) - r = ds.get_batch() - print(r[1][0]) - -if __name__ == '__main__': - _main() diff --git a/deepmd/utils/learning_rate.py b/deepmd/utils/learning_rate.py index 572f317a92..152c3825c1 100644 --- a/deepmd/utils/learning_rate.py +++ b/deepmd/utils/learning_rate.py @@ -14,7 +14,8 @@ def __init__ (self, start_lr : float, stop_lr : float = 5e-8, decay_steps : int = 5000, - decay_rate : float = 0.95 + decay_rate : float = 0.95, + name: str = None ) -> None : """ Constructor @@ -30,6 +31,8 @@ def __init__ (self, decay_rate The decay rate. If `stop_step` is provided in `build`, then it will be determined automatically and overwritten. + name + Name used to identify the learning rate, which is correlated to loss of the same name """ # args = ClassArg()\ # .add('decay_steps', int, must = False)\ @@ -43,6 +46,7 @@ def __init__ (self, self.cd['decay_steps'] = decay_steps self.cd['decay_rate'] = decay_rate self.start_lr_ = self.cd['start_lr'] + self.name = name def build(self, global_step : tf.Tensor, @@ -93,3 +97,8 @@ def value (self, """ return self.start_lr_ * np.power (self.decay_rate_, (step // self.decay_steps_)) + + def get_name(self): + + return self.name + diff --git a/examples/mix/data/HfO2/init.000/set.000/box.npy b/examples/mix/data/HfO2/init.000/set.000/box.npy new file mode 100644 index 0000000000..9d86e26c96 Binary files /dev/null and b/examples/mix/data/HfO2/init.000/set.000/box.npy differ diff --git a/examples/mix/data/HfO2/init.000/set.000/coord.npy b/examples/mix/data/HfO2/init.000/set.000/coord.npy new file mode 100644 index 0000000000..9c8d2abebe Binary files /dev/null and b/examples/mix/data/HfO2/init.000/set.000/coord.npy differ diff --git a/examples/mix/data/HfO2/init.000/set.000/energy.npy b/examples/mix/data/HfO2/init.000/set.000/energy.npy new file mode 100644 index 0000000000..52e09169cb Binary files /dev/null and b/examples/mix/data/HfO2/init.000/set.000/energy.npy differ diff --git a/examples/mix/data/HfO2/init.000/set.000/force.npy b/examples/mix/data/HfO2/init.000/set.000/force.npy new file mode 100644 index 0000000000..e9c9cd5a8e Binary files /dev/null and b/examples/mix/data/HfO2/init.000/set.000/force.npy differ diff --git a/examples/mix/data/HfO2/init.000/set.000/virial.npy b/examples/mix/data/HfO2/init.000/set.000/virial.npy new file mode 100644 index 0000000000..fe77a0fee8 Binary files /dev/null and b/examples/mix/data/HfO2/init.000/set.000/virial.npy differ diff --git a/examples/mix/data/HfO2/init.000/type.raw b/examples/mix/data/HfO2/init.000/type.raw new file mode 100644 index 0000000000..3a3db50188 --- /dev/null +++ b/examples/mix/data/HfO2/init.000/type.raw @@ -0,0 +1,96 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/examples/mix/data/HfO2/init.000/type_map.raw b/examples/mix/data/HfO2/init.000/type_map.raw new file mode 100644 index 0000000000..d0720df960 --- /dev/null +++ b/examples/mix/data/HfO2/init.000/type_map.raw @@ -0,0 +1,2 @@ +Hf +O diff --git a/examples/mix/data/HfO2/init.001/set.000/box.npy b/examples/mix/data/HfO2/init.001/set.000/box.npy new file mode 100644 index 0000000000..8f030b72d1 Binary files /dev/null and b/examples/mix/data/HfO2/init.001/set.000/box.npy differ diff --git a/examples/mix/data/HfO2/init.001/set.000/coord.npy b/examples/mix/data/HfO2/init.001/set.000/coord.npy new file mode 100644 index 0000000000..5a96676f2a Binary files /dev/null and b/examples/mix/data/HfO2/init.001/set.000/coord.npy differ diff --git a/examples/mix/data/HfO2/init.001/set.000/energy.npy b/examples/mix/data/HfO2/init.001/set.000/energy.npy new file mode 100644 index 0000000000..9464083d67 Binary files /dev/null and b/examples/mix/data/HfO2/init.001/set.000/energy.npy differ diff --git a/examples/mix/data/HfO2/init.001/set.000/force.npy b/examples/mix/data/HfO2/init.001/set.000/force.npy new file mode 100644 index 0000000000..7071ae3ac0 Binary files /dev/null and b/examples/mix/data/HfO2/init.001/set.000/force.npy differ diff --git a/examples/mix/data/HfO2/init.001/set.000/virial.npy b/examples/mix/data/HfO2/init.001/set.000/virial.npy new file mode 100644 index 0000000000..5ff5ff404f Binary files /dev/null and b/examples/mix/data/HfO2/init.001/set.000/virial.npy differ diff --git a/examples/mix/data/HfO2/init.001/type.raw b/examples/mix/data/HfO2/init.001/type.raw new file mode 100644 index 0000000000..3a3db50188 --- /dev/null +++ b/examples/mix/data/HfO2/init.001/type.raw @@ -0,0 +1,96 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/examples/mix/data/HfO2/init.001/type_map.raw b/examples/mix/data/HfO2/init.001/type_map.raw new file mode 100644 index 0000000000..d0720df960 --- /dev/null +++ b/examples/mix/data/HfO2/init.001/type_map.raw @@ -0,0 +1,2 @@ +Hf +O diff --git a/examples/mix/data/water/data_0/set.000/box.npy b/examples/mix/data/water/data_0/set.000/box.npy new file mode 100644 index 0000000000..6ad2de625b Binary files /dev/null and b/examples/mix/data/water/data_0/set.000/box.npy differ diff --git a/examples/mix/data/water/data_0/set.000/coord.npy b/examples/mix/data/water/data_0/set.000/coord.npy new file mode 100644 index 0000000000..8bd448b125 Binary files /dev/null and b/examples/mix/data/water/data_0/set.000/coord.npy differ diff --git a/examples/mix/data/water/data_0/set.000/energy.npy b/examples/mix/data/water/data_0/set.000/energy.npy new file mode 100644 index 0000000000..d03db103f5 Binary files /dev/null and b/examples/mix/data/water/data_0/set.000/energy.npy differ diff --git a/examples/mix/data/water/data_0/set.000/force.npy b/examples/mix/data/water/data_0/set.000/force.npy new file mode 100644 index 0000000000..10b2ab83a2 Binary files /dev/null and b/examples/mix/data/water/data_0/set.000/force.npy differ diff --git a/examples/mix/data/water/data_0/type.raw b/examples/mix/data/water/data_0/type.raw new file mode 100644 index 0000000000..97e8fdfcf8 --- /dev/null +++ b/examples/mix/data/water/data_0/type.raw @@ -0,0 +1,192 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/examples/mix/data/water/data_0/type_map.raw b/examples/mix/data/water/data_0/type_map.raw new file mode 100644 index 0000000000..e900768b1d --- /dev/null +++ b/examples/mix/data/water/data_0/type_map.raw @@ -0,0 +1,2 @@ +O +H diff --git a/examples/mix/data/water/data_1/set.000/box.npy b/examples/mix/data/water/data_1/set.000/box.npy new file mode 100644 index 0000000000..6ad2de625b Binary files /dev/null and b/examples/mix/data/water/data_1/set.000/box.npy differ diff --git a/examples/mix/data/water/data_1/set.000/coord.npy b/examples/mix/data/water/data_1/set.000/coord.npy new file mode 100644 index 0000000000..49042a04a2 Binary files /dev/null and b/examples/mix/data/water/data_1/set.000/coord.npy differ diff --git a/examples/mix/data/water/data_1/set.000/energy.npy b/examples/mix/data/water/data_1/set.000/energy.npy new file mode 100644 index 0000000000..9fa747389b Binary files /dev/null and b/examples/mix/data/water/data_1/set.000/energy.npy differ diff --git a/examples/mix/data/water/data_1/set.000/force.npy b/examples/mix/data/water/data_1/set.000/force.npy new file mode 100644 index 0000000000..dd8fccb45d Binary files /dev/null and b/examples/mix/data/water/data_1/set.000/force.npy differ diff --git a/examples/mix/data/water/data_1/set.001/box.npy b/examples/mix/data/water/data_1/set.001/box.npy new file mode 100644 index 0000000000..6ad2de625b Binary files /dev/null and b/examples/mix/data/water/data_1/set.001/box.npy differ diff --git a/examples/mix/data/water/data_1/set.001/coord.npy b/examples/mix/data/water/data_1/set.001/coord.npy new file mode 100644 index 0000000000..476bbec5d7 Binary files /dev/null and b/examples/mix/data/water/data_1/set.001/coord.npy differ diff --git a/examples/mix/data/water/data_1/set.001/energy.npy b/examples/mix/data/water/data_1/set.001/energy.npy new file mode 100644 index 0000000000..146dec5351 Binary files /dev/null and b/examples/mix/data/water/data_1/set.001/energy.npy differ diff --git a/examples/mix/data/water/data_1/set.001/force.npy b/examples/mix/data/water/data_1/set.001/force.npy new file mode 100644 index 0000000000..431e38ccf3 Binary files /dev/null and b/examples/mix/data/water/data_1/set.001/force.npy differ diff --git a/examples/mix/data/water/data_1/type.raw b/examples/mix/data/water/data_1/type.raw new file mode 100644 index 0000000000..97e8fdfcf8 --- /dev/null +++ b/examples/mix/data/water/data_1/type.raw @@ -0,0 +1,192 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/examples/mix/data/water/data_1/type_map.raw b/examples/mix/data/water/data_1/type_map.raw new file mode 100644 index 0000000000..e900768b1d --- /dev/null +++ b/examples/mix/data/water/data_1/type_map.raw @@ -0,0 +1,2 @@ +O +H diff --git a/examples/mix/data/water/data_2/set.000/box.npy b/examples/mix/data/water/data_2/set.000/box.npy new file mode 100644 index 0000000000..6ad2de625b Binary files /dev/null and b/examples/mix/data/water/data_2/set.000/box.npy differ diff --git a/examples/mix/data/water/data_2/set.000/coord.npy b/examples/mix/data/water/data_2/set.000/coord.npy new file mode 100644 index 0000000000..b50cf2b802 Binary files /dev/null and b/examples/mix/data/water/data_2/set.000/coord.npy differ diff --git a/examples/mix/data/water/data_2/set.000/energy.npy b/examples/mix/data/water/data_2/set.000/energy.npy new file mode 100644 index 0000000000..52287bab34 Binary files /dev/null and b/examples/mix/data/water/data_2/set.000/energy.npy differ diff --git a/examples/mix/data/water/data_2/set.000/force.npy b/examples/mix/data/water/data_2/set.000/force.npy new file mode 100644 index 0000000000..f1ad439e45 Binary files /dev/null and b/examples/mix/data/water/data_2/set.000/force.npy differ diff --git a/examples/mix/data/water/data_2/type.raw b/examples/mix/data/water/data_2/type.raw new file mode 100644 index 0000000000..97e8fdfcf8 --- /dev/null +++ b/examples/mix/data/water/data_2/type.raw @@ -0,0 +1,192 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/examples/mix/data/water/data_2/type_map.raw b/examples/mix/data/water/data_2/type_map.raw new file mode 100644 index 0000000000..e900768b1d --- /dev/null +++ b/examples/mix/data/water/data_2/type_map.raw @@ -0,0 +1,2 @@ +O +H diff --git a/examples/mix/data/water/data_3/set.000/box.npy b/examples/mix/data/water/data_3/set.000/box.npy new file mode 100644 index 0000000000..6ad2de625b Binary files /dev/null and b/examples/mix/data/water/data_3/set.000/box.npy differ diff --git a/examples/mix/data/water/data_3/set.000/coord.npy b/examples/mix/data/water/data_3/set.000/coord.npy new file mode 100644 index 0000000000..a91e185d24 Binary files /dev/null and b/examples/mix/data/water/data_3/set.000/coord.npy differ diff --git a/examples/mix/data/water/data_3/set.000/energy.npy b/examples/mix/data/water/data_3/set.000/energy.npy new file mode 100644 index 0000000000..d6fc4e3832 Binary files /dev/null and b/examples/mix/data/water/data_3/set.000/energy.npy differ diff --git a/examples/mix/data/water/data_3/set.000/force.npy b/examples/mix/data/water/data_3/set.000/force.npy new file mode 100644 index 0000000000..e85711a47d Binary files /dev/null and b/examples/mix/data/water/data_3/set.000/force.npy differ diff --git a/examples/mix/data/water/data_3/type.raw b/examples/mix/data/water/data_3/type.raw new file mode 100644 index 0000000000..97e8fdfcf8 --- /dev/null +++ b/examples/mix/data/water/data_3/type.raw @@ -0,0 +1,192 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/examples/mix/data/water/data_3/type_map.raw b/examples/mix/data/water/data_3/type_map.raw new file mode 100644 index 0000000000..e900768b1d --- /dev/null +++ b/examples/mix/data/water/data_3/type_map.raw @@ -0,0 +1,2 @@ +O +H diff --git a/examples/mix/input.json b/examples/mix/input.json new file mode 100644 index 0000000000..7990e96005 --- /dev/null +++ b/examples/mix/input.json @@ -0,0 +1,170 @@ +{ + "_comment": " model parameters", + "model": { + "type_map": ["H","O","Hf"], + "type_embedding":{ + "neuron": [2, 4, 8], + "resnet_dt": false, + "seed": 1 + }, + "descriptor" :[ + { + "type": "se_e2_a", + "name": "HfO2", + "sel": [92,120,0], + "rcut_smth": 2.0, + "rcut": 9.00, + "neuron": [25, 50, 100], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "_comment": " that's all" + }, + { + "type": "se_e2_a", + "name": "water", + "sel": [0,120,60], + "rcut_smth": 2.0, + "rcut": 9.00, + "neuron": [25, 50, 100], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "_comment": " that's all" + } + ], + "fitting_net" :[ + { + "name": "HfO2", + "neuron": [240, 240, 240], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + }, + { + "name": "water", + "neuron": [120, 120, 120], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + } + ], + "_comment": " that's all" + }, + + "learning_rate" :[ + { + "name": "HfO2", + "type": "exp", + "decay_steps": 4000, + "start_lr": 0.002, + "stop_lr": 5.69e-7, + "_comment": "that's all" + }, + { + "name": "water", + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.001, + "stop_lr": 3.51e-8, + "_comment": "that's all" + } + ], + "loss" :[ + { + "name": "HfO2", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0.0, + "limit_pref_v": 0.0, + "_comment": " that's all" + }, + { + "name": "water", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + "_comment": " that's all" + } + + ], + + "training" : { + "training_data": { + "systems":[ + { + "name": "HfO2", + "data":[ + "data/HfO2/init.000", + "data/HfO2/init.001" + ] + }, + { + "name": "water", + "data":[ + "data/water/data_0/", + "data/water/data_1/", + "data/water/data_2/" + ] + } + ], + "auto_prob_method": "prob_uniform", + "batch_size": "auto", + "_comment": "that's all" + }, + "validation_data":{ + "systems":[ + { + "name": "HfO2", + "data":[ + "data/HfO2/init.000" + ] + }, + { + "name": "water", + "data":[ + "data/water/data_3" + ] + } + ], + + "batch_size": 1, + "numb_btch": 3, + "_comment": "that's all" + }, + "tasks":[ + { + "name": "HfO2", + "descriptor": "HfO2", + "fitting_net": "HfO2", + "learning_rate": "HfO2", + "loss": "HfO2" + }, + { + "name": "water", + "descriptor": "water", + "fitting_net": "water", + "learning_rate": "water", + "loss": "water" + } + ], + "numb_steps": 100000, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 100, + "save_freq": 10000, + "_comment": "that's all" + }, + + "_comment": "that's all" +} + diff --git a/source/tests/input_correct.json b/source/tests/input_correct.json new file mode 100644 index 0000000000..45f1b78e43 --- /dev/null +++ b/source/tests/input_correct.json @@ -0,0 +1,220 @@ +{ + "model": { + "type_map": [ + "H", + "O", + "Hf" + ], + "type_embedding": { + "neuron": [ + 2, + 4, + 8 + ], + "resnet_dt": false, + "seed": 1, + "activation_function": "tanh", + "precision": "float64", + "trainable": true + }, + "descriptor": [ + { + "type": "se_e2_a", + "name": "HfO2", + "sel": [ + 92, + 120, + 0 + ], + "rcut_smth": 2.0, + "rcut": 9.0, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "activation_function": "tanh", + "precision": "float64", + "trainable": true, + "exclude_types": [], + "set_davg_zero": false + }, + { + "type": "se_e2_a", + "name": "water", + "sel": [ + 0, + 120, + 60 + ], + "rcut_smth": 2.0, + "rcut": 9.0, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "activation_function": "tanh", + "precision": "float64", + "trainable": true, + "exclude_types": [], + "set_davg_zero": false + } + ], + "fitting_net": [ + { + "name": "HfO2", + "neuron": [ + 240, + 240, + 240 + ], + "resnet_dt": true, + "seed": 1, + "type": "ener", + "numb_fparam": 0, + "numb_aparam": 0, + "activation_function": "tanh", + "precision": "float64", + "trainable": true, + "rcond": 0.001, + "atom_ener": [] + }, + { + "name": "water", + "neuron": [ + 120, + 120, + 120 + ], + "resnet_dt": true, + "seed": 1, + "type": "ener", + "numb_fparam": 0, + "numb_aparam": 0, + "activation_function": "tanh", + "precision": "float64", + "trainable": true, + "rcond": 0.001, + "atom_ener": [] + } + ], + "data_stat_nbatch": 10, + "data_stat_protect": 0.01 + }, + "learning_rate": [ + { + "name": "HfO2", + "type": "exp", + "decay_steps": 4000, + "start_lr": 0.002, + "stop_lr": 5.69e-07 + }, + { + "name": "water", + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.001, + "stop_lr": 3.51e-08 + } + ], + "loss": [ + { + "name": "HfO2", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0.0, + "limit_pref_v": 0.0, + "start_pref_ae": 0.0, + "limit_pref_ae": 0.0 + }, + { + "name": "water", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + "start_pref_ae": 0.0, + "limit_pref_ae": 0.0 + } + ], + "training": { + "training_data": { + "systems": [ + { + "name": "HfO2", + "data": [] + }, + { + "name": "water", + "data": [] + } + ], + "auto_prob_method": "prob_sys_size", + "batch_size": "auto", + "set_prefix": "set", + "auto_prob": "prob_sys_size", + "sys_probs": null + }, + "validation_data": { + "systems": [ + { + "name": "HfO2", + "data": [] + }, + { + "name": "water", + "data": [] + } + ], + "batch_size": 1, + "numb_btch": 3, + "set_prefix": "set", + "auto_prob": "prob_sys_size", + "sys_probs": null + }, + "tasks": [ + { + "name": "HfO2", + "descriptor": "HfO2", + "fitting_net": "HfO2", + "learning_rate": "HfO2", + "loss": "HfO2" + }, + { + "name": "water", + "descriptor": "water", + "fitting_net": "water", + "learning_rate": "water", + "loss": "water" + } + ], + "numb_steps": 16000000, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 2000, + "save_freq": 10000, + "numb_test": 1, + "save_ckpt": "model.ckpt", + "disp_training": true, + "time_training": true, + "profiling": false, + "profiling_file": "timeline.json", + "tensorboard": false, + "tensorboard_log_dir": "log" + } +} diff --git a/source/tests/input_mt.json b/source/tests/input_mt.json new file mode 100644 index 0000000000..13c0531a9f --- /dev/null +++ b/source/tests/input_mt.json @@ -0,0 +1,192 @@ +{ + "_comment": "that's all", + "model": { + "type_map": [ + "H", + "O", + "Hf" + ], + "type_embedding": { + "neuron": [ + 2, + 4, + 8 + ], + "resnet_dt": false, + "seed": 1 + }, + "descriptor": [ + { + "type": "se_e2_a", + "name": "HfO2", + "sel": [ + 0, + 120, + 60 + ], + "rcut_smth": 2.0, + "rcut": 9.0, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "_comment": " that's all" + }, + { + "type": "se_e2_a", + "name": "water", + "sel": [ + 92, + 120, + 0 + ], + "rcut_smth": 2.0, + "rcut": 9.0, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "_comment": " that's all" + } + ], + "fitting_net": [ + { + "name": "HfO2", + "neuron": [ + 240, + 240, + 240 + ], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + }, + { + "name": "water", + "neuron": [ + 120, + 120, + 120 + ], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + } + ], + "_comment": " that's all" + }, + "learning_rate": [ + { + "name": "HfO2", + "type": "exp", + "decay_steps": 4000, + "start_lr": 0.002, + "stop_lr": 5.69e-07, + "_comment": "that's all" + }, + { + "name": "water", + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.001, + "stop_lr": 3.51e-08, + "_comment": "that's all" + } + ], + "loss": [ + { + "name": "HfO2", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0.0, + "limit_pref_v": 0.0, + "_comment": " that's all" + }, + { + "name": "water", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + "_comment": " that's all" + } + ], + "training": { + "training_data": { + "systems": [ + { + "name": "HfO2", + "data": [ + "model_compression/data_1" + ] + }, + { + "name": "water", + "data": [ + "model_compression/data" + ] + } + ], + "auto_prob_method": "prob_uniform", + "batch_size": "auto", + "_comment": "that's all" + }, + "validation_data": { + "systems": [ + { + "name": "HfO2", + "data": [ + "model_compression/data_1" + ] + }, + { + "name": "water", + "data": [ + "model_compression/data" + ] + } + ], + "batch_size": 1, + "numb_btch": 3, + "_comment": "that's all" + }, + "tasks": [ + { + "name": "HfO2", + "descriptor": "HfO2", + "fitting_net": "HfO2", + "learning_rate": "HfO2", + "loss": "HfO2" + }, + { + "name": "water", + "descriptor": "water", + "fitting_net": "water", + "learning_rate": "water", + "loss": "water" + } + ], + "numb_steps": 2, + "seed": 1234, + "disp_file": "lcurve.out", + "disp_freq": 1, + "save_freq": 2, + "_comment": "that's all" + } +} diff --git a/source/tests/input_origin.json b/source/tests/input_origin.json new file mode 100644 index 0000000000..2dc915b5fa --- /dev/null +++ b/source/tests/input_origin.json @@ -0,0 +1,159 @@ +{ + "_comment": " model parameters", + "model": { + "type_map": ["H","O","Hf"], + "type_embedding":{ + "neuron": [2, 4, 8], + "resnet_dt": false, + "seed": 1 + }, + "descriptor" :[ + { + "type": "se_e2_a", + "name": "HfO2", + "sel": [92,120,0], + "rcut_smth": 2.0, + "rcut": 9.00, + "neuron": [25, 50, 100], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "_comment": " that's all" + }, + { + "type": "se_e2_a", + "name": "water", + "sel": [0,120,60], + "rcut_smth": 2.0, + "rcut": 9.00, + "neuron": [25, 50, 100], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true, + "seed": 1, + "_comment": " that's all" + } + ], + "fitting_net" :[ + { + "name": "HfO2", + "neuron": [240, 240, 240], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + }, + { + "name": "water", + "neuron": [120, 120, 120], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + } + ], + "_comment": " that's all" + }, + + "learning_rate" :[ + { + "name": "HfO2", + "type": "exp", + "decay_steps": 4000, + "start_lr": 0.002, + "stop_lr": 5.69e-7, + "_comment": "that's all" + }, + { + "name": "water", + "type": "exp", + "decay_steps": 5000, + "start_lr": 0.001, + "stop_lr": 3.51e-8, + "_comment": "that's all" + } + ], + "loss" :[ + { + "name": "HfO2", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0.0, + "limit_pref_v": 0.0, + "_comment": " that's all" + }, + { + "name": "water", + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + "_comment": " that's all" + } + + ], + + "training" : { + "training_data": { + "systems":[ + { + "name": "HfO2", + "data":[] + }, + { + "name": "water", + "data":[] + } + ], + "auto_prob_method": "prob_sys_size", + "batch_size": "auto", + "_comment": "that's all" + }, + "validation_data":{ + "systems":[ + { + "name": "HfO2", + "data":[] + }, + { + "name": "water", + "data":[] + } + ], + + "batch_size": 1, + "numb_btch": 3, + "_comment": "that's all" + }, + "tasks":[ + { + "name": "HfO2", + "descriptor": "HfO2", + "fitting_net": "HfO2", + "learning_rate": "HfO2", + "loss": "HfO2" + }, + { + "name": "water", + "descriptor": "water", + "fitting_net": "water", + "learning_rate": "water", + "loss": "water" + } + ], + "numb_steps": 16000000, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 2000, + "save_freq": 10000, + "_comment": "that's all" + }, + + "_comment": "that's all" +} + diff --git a/source/tests/model_compression/data_1/set.000/box.npy b/source/tests/model_compression/data_1/set.000/box.npy new file mode 100644 index 0000000000..1f5de3f920 Binary files /dev/null and b/source/tests/model_compression/data_1/set.000/box.npy differ diff --git a/source/tests/model_compression/data_1/set.000/coord.npy b/source/tests/model_compression/data_1/set.000/coord.npy new file mode 100644 index 0000000000..3ab4571007 Binary files /dev/null and b/source/tests/model_compression/data_1/set.000/coord.npy differ diff --git a/source/tests/model_compression/data_1/set.000/energy.npy b/source/tests/model_compression/data_1/set.000/energy.npy new file mode 100644 index 0000000000..35899bf0f1 Binary files /dev/null and b/source/tests/model_compression/data_1/set.000/energy.npy differ diff --git a/source/tests/model_compression/data_1/set.000/force.npy b/source/tests/model_compression/data_1/set.000/force.npy new file mode 100644 index 0000000000..a1f696c186 Binary files /dev/null and b/source/tests/model_compression/data_1/set.000/force.npy differ diff --git a/source/tests/model_compression/data_1/type.raw b/source/tests/model_compression/data_1/type.raw new file mode 100644 index 0000000000..97e8fdfcf8 --- /dev/null +++ b/source/tests/model_compression/data_1/type.raw @@ -0,0 +1,192 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/source/tests/model_compression/data_1/type_map.raw b/source/tests/model_compression/data_1/type_map.raw new file mode 100644 index 0000000000..d0720df960 --- /dev/null +++ b/source/tests/model_compression/data_1/type_map.raw @@ -0,0 +1,2 @@ +Hf +O diff --git a/source/tests/test_argcheck_mt.py b/source/tests/test_argcheck_mt.py new file mode 100644 index 0000000000..2e6b72efb2 --- /dev/null +++ b/source/tests/test_argcheck_mt.py @@ -0,0 +1,15 @@ +import os,sys,shutil,copy +import numpy as np +import unittest + +from deepmd.utils.argcheck_mt import normalize_mt +from deepmd.utils.compat import updata_deepmd_input +from common import j_loader + +class TestArgcheckMt (unittest.TestCase) : + def test_argcheck_mt (self) : + jdata = j_loader('input_origin.json') + jdata = updata_deepmd_input(jdata, warning=True, dump="input_v2_compat.json") + jdata = normalize_mt(jdata) + jdata1 = j_loader('input_correct.json') + self.assertEqual(jdata,jdata1) diff --git a/source/tests/test_deepmd_data_docker.py b/source/tests/test_deepmd_data_docker.py new file mode 100644 index 0000000000..9cf012a954 --- /dev/null +++ b/source/tests/test_deepmd_data_docker.py @@ -0,0 +1,143 @@ +import os,sys,shutil,copy +import numpy as np +import unittest + +from deepmd.utils.data_system import DeepmdDataSystem +from deepmd.utils.data_docker import DeepmdDataDocker +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION + +if GLOBAL_NP_FLOAT_PRECISION == np.float32 : + places = 6 +else: + places = 12 + +class TestDataDocker (unittest.TestCase) : + def setUp (self) : + self.nmethod = 2 + self.dsmt_list = [] + self.nframes = [3, 6, 5, 4] + self.natoms = [3, 4, 6, 5] + self.atom_type = [[1, 0, 0], + [2, 1, 0, 2], + [0, 0, 1, 1, 2, 1], + [0, 2, 2, 0, 0]] + self.test_ndof = 2 + self.nsys = 4 + self.nset = 3 + self.data_systems = [] + for method in range(self.nmethod): + sub_sys = {} + nname = 'method_%d' % method + os.makedirs(nname, exist_ok = True) + sys_name_total = [] + for ii in range(self.nsys) : + sys_name = os.path.join(nname, 'sys_%d' % ii) + sys_name_total.append(sys_name) + os.makedirs(sys_name, exist_ok = True) + np.savetxt(os.path.join(sys_name, 'type.raw'), + self.atom_type[ii], + fmt = '%d') + for jj in range(self.nset): + set_name = os.path.join(sys_name, 'set.%03d' % jj) + os.makedirs(set_name, exist_ok = True) + path = os.path.join(set_name, 'coord.npy') + val = np.random.random([self.nframes[ii]+jj, self.natoms[ii]*3]) + np.save(path, val) + path = os.path.join(set_name, 'box.npy') + val = np.random.random([self.nframes[ii]+jj, 9]) * 10 + np.save(path, val) + path = os.path.join(set_name, 'test.npy') + val = np.random.random([self.nframes[ii]+jj, self.natoms[ii]*self.test_ndof]) + np.save(path, val) + sub_sys['data'] = sys_name_total + sub_sys['name'] = nname + self.data_systems.append(sub_sys) + + + def tearDown(self): + for method in range(self.nmethod): + nname = 'method_%d' % method + shutil.rmtree(nname) + + + def test_ntypes(self) : + batch_size = 2 + ds = DeepmdDataDocker(self.data_systems, batch_size, rcut = 2.0) + self.assertEqual(ds.get_nmethod(), self.nmethod) + self.assertEqual(ds.get_nbatches(), [5, 10, 8, 7, 5, 10, 8, 7]) + self.assertEqual(ds.get_name(), ['method_0','method_1']) + self.assertEqual(list(ds.get_batch_size()), [2, 2, 2, 2, 2, 2, 2, 2]) + + def test_get_data_system(self): + batch_size = 2 + ds = DeepmdDataDocker(self.data_systems, batch_size, rcut = 2.0) + pick_idx = 0 + method_name = 'method_%d' % pick_idx + self.assertEqual(ds.get_data_system_idx(pick_idx).get_name(), method_name) + self.assertEqual(ds.get_data_system(method_name).get_name(), method_name) + + def test_get_batch(self): + batch_size = 2 + ds = DeepmdDataDocker(self.data_systems, batch_size, rcut = 2.0) + for i in range(self.nmethod): + ds.get_data_system_idx(i).add('test', self.test_ndof, atomic = True, must = True) + ds.get_data_system_idx(i).add('null', self.test_ndof, atomic = True, must = False) + method_idx = 0 + sys_idx = 0 + data = ds.get_batch(method_idx = method_idx,sys_idx=sys_idx)['data'] + self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) + self._in_array(np.load('method_0/sys_0/set.000/coord.npy'), + ds.get_data_system_idx(method_idx).get_sys(sys_idx).idx_map, + 3, + data['coord']) + self._in_array(np.load('method_0/sys_0/set.000/test.npy'), + ds.get_data_system_idx(method_idx).get_sys(sys_idx).idx_map, + self.test_ndof, + data['test']) + self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, + self.natoms[sys_idx]*self.test_ndof]) + - + data['null'] + ), 0.0) + sys_idx = 2 + data = ds.get_batch(method_idx = method_idx,sys_idx=sys_idx)['data'] + self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx]))) + self._in_array(np.load('method_0/sys_2/set.000/coord.npy'), + ds.get_data_system_idx(method_idx).get_sys(sys_idx).idx_map, + 3, + data['coord']) + self._in_array(np.load('method_0/sys_2/set.000/test.npy'), + ds.get_data_system_idx(method_idx).get_sys(sys_idx).idx_map, + self.test_ndof, + data['test']) + self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, + self.natoms[sys_idx]*self.test_ndof]) + - + data['null'] + ), 0.0) + + + + + def _idx_map(self, target, idx_map, ndof): + natoms = len(idx_map) + target = target.reshape([-1, natoms, ndof]) + target = target[:,idx_map,:] + target = target.reshape([-1, natoms * ndof]) + return target + + def _in_array(self, target, idx_map, ndof, array): + target = self._idx_map(target, idx_map, ndof) + all_find = [] + for ii in array : + find = False + for jj in target : + if np.linalg.norm(ii - jj) < 1e-5 : + find = True + all_find.append(find) + for idx,ii in enumerate(all_find) : + self.assertTrue(ii, msg = 'does not find frame %d in array' % idx) + + + + diff --git a/source/tests/test_multitask.py b/source/tests/test_multitask.py new file mode 100644 index 0000000000..0cb472091d --- /dev/null +++ b/source/tests/test_multitask.py @@ -0,0 +1,51 @@ + +import dpdata,os,sys,unittest,json +import numpy as np +from deepmd.env import tf +from common import Data, gen_data, del_data, j_loader,tests_path +from deepmd.descriptor import DescrptSeA +from deepmd.fit import EnerFitting +from deepmd.model import EnerModel +from deepmd.common import j_must_have + +GLOBAL_ENER_FLOAT_PRECISION = tf.float64 +GLOBAL_TF_FLOAT_PRECISION = tf.float64 +GLOBAL_NP_FLOAT_PRECISION = np.float64 + +def _file_delete(file) : + if os.path.exists(file): + os.remove(file) + +class TestModel(tf.test.TestCase): + def setUp(self) : + self.INPUT = str(tests_path / 'input_mt.json') + jdata = j_loader(self.INPUT) + for sub_sys in jdata['training']['training_data']['systems']: + for i in range(len(sub_sys['data'])): + sub_sys['data'][i] = str(tests_path / sub_sys['data'][i]) + for sub_sys in jdata['training']['validation_data']['systems']: + for i in range(len(sub_sys['data'])): + sub_sys['data'][i] = str(tests_path / sub_sys['data'][i]) + with open(self.INPUT, "w") as fp: + json.dump(jdata, fp, indent=4) + + def tearDown(self): + _file_delete("out.json") + _file_delete("checkpoint") + _file_delete("lcurve.out") + _file_delete("model.ckpt.meta") + _file_delete("model.ckpt.index") + _file_delete("model.ckpt.data-00000-of-00001") + + def test_model_atom_ener(self): + ret = os.system("dp train -mt " + self.INPUT) + assert(ret == 0), "DP train error!" + dd = np.loadtxt("lcurve.out",skiprows=1)[:,:9] + dd = dd.reshape([3,-1]) + + ref_loss = [0.0,307.0,307.0,156.0,156.0,0.765,0.809,0.001,1.0,1.0,2150.0,2150.0,156.0,156.0,0.792,0.813,5.9e-06,1.0, + 2.0,2160.0,2160.0,156.0,156.0,0.829,0.787,3.5e-08,1.0] + + for ii in range(3): + for jj in range(9): + self.assertAlmostEqual(dd[ii][jj], ref_loss[ii*9+jj], places = 8)