diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py index 2f8bae9bbd..3beceace3a 100644 --- a/deepmd/entrypoints/__init__.py +++ b/deepmd/entrypoints/__init__.py @@ -7,6 +7,7 @@ from .test import test from .train import train from .transfer import transfer +from ..infer.model_devi import make_model_devi __all__ = [ "config", @@ -17,4 +18,5 @@ "transfer", "compress", "doc_train_input", + "make_model_devi" ] diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index 23bb30ccde..e0c1d8d4af 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -13,6 +13,7 @@ test, train, transfer, + make_model_devi, ) from deepmd.loggers import set_log_handles @@ -311,6 +312,46 @@ def parse_args(args: Optional[List[str]] = None): formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) + # * make model deviation *********************************************************** + parser_model_devi = subparsers.add_parser( + "model-devi", + parents=[parser_log], + help="calculate model deviation", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser_model_devi.add_argument( + "-m", + "--models", + default=["graph.000.pb", "graph.001.pb", "graph.002.pb", "graph.003.pb"], + nargs="+", + type=str, + help="Frozen models file to import", + ) + parser_model_devi.add_argument( + "-s", + "--system", + default=".", + type=str, + help="The system directory, not support recursive detection.", + ) + parser_model_devi.add_argument( + "-S", "--set-prefix", default="set", type=str, help="The set prefix" + ) + parser_model_devi.add_argument( + "-o", + "--output", + default="model_devi.out", + type=str, + help="The output file for results of model deviation" + ) + parser_model_devi.add_argument( + "-f", + "--frequency", + default=1, + type=int, + help="The trajectory frequency of the system" + ) + parsed_args = parser.parse_args(args=args) if parsed_args.command is None: parser.print_help() @@ -352,6 +393,8 @@ def main(): compress(**dict_args) elif args.command == "doc-train-input": doc_train_input() + elif args.command == "model-devi": + make_model_devi(**dict_args) elif args.command is None: pass else: diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py index c0c7c73182..0eb3bddcea 100644 --- a/deepmd/infer/__init__.py +++ b/deepmd/infer/__init__.py @@ -10,6 +10,7 @@ from .deep_pot import DeepPot from .deep_wfc import DeepWFC from .ewald_recp import EwaldRecp +from .model_devi import calc_model_devi __all__ = [ "DeepPotential", @@ -21,6 +22,7 @@ "DeepWFC", "DipoleChargeModifier", "EwaldRecp", + "calc_model_devi" ] diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py new file mode 100644 index 0000000000..0fb4faa326 --- /dev/null +++ b/deepmd/infer/model_devi.py @@ -0,0 +1,184 @@ +import numpy as np +from .deep_pot import DeepPot +from ..utils.data import DeepmdData + + +def calc_model_devi_f(fs): + ''' + fs : numpy.ndarray, size of `n_models x n_frames x n_atoms x 3` + ''' + fs_devi = np.linalg.norm(np.std(fs, axis=0), axis=-1) + max_devi_f = np.max(fs_devi, axis=-1) + min_devi_f = np.min(fs_devi, axis=-1) + avg_devi_f = np.mean(fs_devi, axis=-1) + return max_devi_f, min_devi_f, avg_devi_f + +def calc_model_devi_e(es): + ''' + es : numpy.ndarray, size of `n_models x n_frames x n_atoms + ''' + es_devi = np.std(es, axis=0) + max_devi_e = np.max(es_devi, axis=1) + min_devi_e = np.min(es_devi, axis=1) + avg_devi_e = np.mean(es_devi, axis=1) + return max_devi_e, min_devi_e, avg_devi_e + +def calc_model_devi_v(vs): + ''' + vs : numpy.ndarray, size of `n_models x n_frames x 9` + ''' + vs_devi = np.std(vs, axis=0) + max_devi_v = np.max(vs_devi, axis=-1) + min_devi_v = np.min(vs_devi, axis=-1) + avg_devi_v = np.linalg.norm(vs_devi, axis=-1) / 3 + return max_devi_v, min_devi_v, avg_devi_v + +def write_model_devi_out(devi, fname): + ''' + devi : numpy.ndarray, the first column is the steps index + fname : str, the file name to dump + ''' + assert devi.shape[1] == 7 + header = "%10s" % "step" + for item in 'vf': + header += "%19s%19s%19s" % (f"max_devi_{item}", f"min_devi_{item}", f"avg_devi_{item}") + np.savetxt(fname, + devi, + fmt=['%12d'] + ['%19.6e' for _ in range(6)], + delimiter='', + header=header) + return devi + +def _check_tmaps(tmaps, ref_tmap=None): + ''' + Check whether type maps are identical + ''' + assert isinstance(tmaps, list) + if ref_tmap is None: + ref_tmap = tmaps[0] + assert isinstance(ref_tmap, list) + + flag = True + for tmap in tmaps: + if tmap != ref_tmap: + flag = False + break + return flag + +def calc_model_devi(coord, + box, + atype, + models, + fname=None, + frequency=1, + nopbc=True): + ''' + Python interface to calculate model deviation + + Parameters: + ----------- + coord : numpy.ndarray, `n_frames x n_atoms x 3` + Coordinates of system to calculate + box : numpy.ndarray or None, `n_frames x 3 x 3` + Box to specify periodic boundary condition. If None, no pbc will be used + atype : numpy.ndarray, `n_atoms x 1` + Atom types + models : list of DeepPot models + Models used to evaluate deviation + fname : str or None + File to dump results, default None + frequency : int + Steps between frames (if the system is given by molecular dynamics engine), default 1 + nopbc : bool + Whether to use pbc conditions + + Return: + ------- + model_devi : numpy.ndarray, `n_frames x 7` + Model deviation results. The first column is index of steps, the other 6 columns are + max_devi_v, min_devi_v, avg_devi_v, max_devi_f, min_devi_f, avg_devi_f. + ''' + if nopbc: + box = None + + forces = [] + virials = [] + for dp in models: + ret = dp.eval( + coord, + box, + atype, + ) + forces.append(ret[1]) + virials.append(ret[2] / len(atype)) + + forces = np.array(forces) + virials = np.array(virials) + + devi = [np.arange(coord.shape[0]) * frequency] + devi += list(calc_model_devi_v(virials)) + devi += list(calc_model_devi_f(forces)) + devi = np.vstack(devi).T + if fname: + write_model_devi_out(devi, fname) + return devi + +def make_model_devi( + *, + models: list, + system: str, + set_prefix: str, + output: str, + frequency: int, + **kwargs +): + ''' + Make model deviation calculation + + Parameters + ---------- + + models: list + A list of paths of models to use for making model deviation + system: str + The path of system to make model deviation calculation + set_prefix: str + The set prefix of the system + output: str + The output file for model deviation results + frequency: int + The number of steps that elapse between writing coordinates + in a trajectory by a MD engine (such as Gromacs / Lammps). + This paramter is used to determine the index in the output file. + ''' + # init models + dp_models = [DeepPot(model) for model in models] + + # check type maps + tmaps = [dp.get_type_map() for dp in dp_models] + if _check_tmaps(tmaps): + tmap = tmaps[0] + else: + raise RuntimeError("The models does not have the same type map.") + + # create data-system + dp_data = DeepmdData(system, set_prefix, shuffle_test=False, type_map=tmap) + if dp_data.pbc: + nopbc = False + else: + nopbc = True + + data_sets = [dp_data._load_set(set_name) for set_name in dp_data.dirs] + nframes_tot = 0 + devis = [] + for data in data_sets: + coord = data["coord"] + box = data["box"] + atype = data["type"][0] + devi = calc_model_devi(coord, box, atype, dp_models, nopbc=nopbc) + nframes_tot += coord.shape[0] + devis.append(devi) + devis = np.vstack(devis) + devis[:, 0] = np.arange(nframes_tot) * frequency + write_model_devi_out(devis, output) + return devis diff --git a/doc/getting-started.md b/doc/getting-started.md index 12948f0775..a355fc9836 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -182,6 +182,45 @@ optional arguments: accuracy ``` +### Calculate Model Deviation + +One can also use a subcommand to calculate deviation of prediced forces or virials for a bunch of models in the following way: +```bash +dp model-devi -m graph.000.pb graph.001.pb graph.002.pb graph.003.pb -s ./data -o model_devi.out +``` +where `-m` specifies graph files to be calculated, `-s` gives the data to be evaluated, `-o` the file to which model deviation results is dumped. Here is more information on this sub-command: +```bash +usage: dp model-devi [-h] [-v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}] + [-l LOG_PATH] [-m MODELS [MODELS ...]] [-s SYSTEM] + [-S SET_PREFIX] [-o OUTPUT] [-f FREQUENCY] [-i ITEMS] + +optional arguments: + -h, --help show this help message and exit + -v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}, --log-level {DEBUG,3,INFO,2,WARNING,1,ERROR,0} + set verbosity level by string or number, 0=ERROR, + 1=WARNING, 2=INFO and 3=DEBUG (default: INFO) + -l LOG_PATH, --log-path LOG_PATH + set log file to log messages to disk, if not + specified, the logs will only be output to console + (default: None) + -m MODELS [MODELS ...], --models MODELS [MODELS ...] + Frozen models file to import (default: + ['graph.000.pb', 'graph.001.pb', 'graph.002.pb', + 'graph.003.pb']) + -s SYSTEM, --system SYSTEM + The system directory, not support recursive detection. + (default: .) + -S SET_PREFIX, --set-prefix SET_PREFIX + The set prefix (default: set) + -o OUTPUT, --output OUTPUT + The output file for results of model deviation + (default: model_devi.out) + -f FREQUENCY, --frequency FREQUENCY + The trajectory frequency of the system (default: 1) +``` + +For more details with respect to definition of model deviation and its application, please refer to `Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and Weinan E, DP-GEN: A concurrent learning platform for the generation of reliable deep learning based potential energy models, Computer Physics Communications, 2020, 253, 107206.` + ## Compress a model Once the frozen model is obtained from deepmd-kit, we can get the neural network structure and its parameters (weights, biases, etc.) from the trained model, and compress it in the following way: @@ -253,6 +292,19 @@ e, f, v = dp.eval(coord, cell, atype) ``` where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively. +Furthermore, one can use the python interface to calulate model deviation. +```python +from deepmd.infer import calc_model_devi +from deepmd.infer import DeepPot as DP +import numpy as np + +coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1]) +cell = np.diag(10 * np.ones(3)).reshape([1, -1]) +atype = [1,0,1] +graphs = [DP("graph.000.pb"), DP("graph.001.pb")] +model_devi = calc_model_devi(coord, cell, atype, graphs) +``` + ### C++ interface The C++ interface of DeePMD-kit is also avaiable for model interface, which is considered faster than Python interface. An example `infer_water.cpp` is given below: ```cpp diff --git a/source/tests/test_argument_parser.py b/source/tests/test_argument_parser.py index a7e42ad81e..1c85728e40 100644 --- a/source/tests/test_argument_parser.py +++ b/source/tests/test_argument_parser.py @@ -44,7 +44,7 @@ def build_args(args: "TEST_DICT", command: str) -> List[str]: # arguments without value are passed as such, typically these are where action # is 'count' or 'store_true' if "value" in test_data: - args_list.append(str(test_data["value"])) + args_list += str(test_data["value"]).split() return args_list @@ -285,6 +285,19 @@ def test_parser_doc(self): ARGS = {} self.run_test(command="doc-train-input", mapping=ARGS) + + def test_parser_model_devi(self): + """Test model-devi subparser""" + ARGS = { + "--models": dict(type=list, value="GRAPH.000.pb GRAPH.001.pb", + expected=["GRAPH.000.pb", "GRAPH.001.pb"]), + "--system": dict(type=str, value="SYSTEM_DIR"), + "--set-prefix": dict(type=str, value="SET_PREFIX"), + "--output": dict(type=str, value="OUTFILE"), + "--frequency": dict(type=int, value=1) + } + + self.run_test(command="model-devi", mapping=ARGS) def test_get_log_level(self): MAPPING = { diff --git a/source/tests/test_model_devi.py b/source/tests/test_model_devi.py new file mode 100644 index 0000000000..3e5161b44d --- /dev/null +++ b/source/tests/test_model_devi.py @@ -0,0 +1,50 @@ +from deepmd.infer import DeepPotential +import unittest +import os, sys, shutil +import numpy as np +from deepmd.infer import calc_model_devi +sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) +from infer.convert2pb import convert_pbtxt_to_pb +from common import gen_data, tests_path, del_data + + +class TestMakeModelDevi(unittest.TestCase): + def setUp(self): + gen_data() + self.data_dir = "system" + coord = np.load(os.path.join(self.data_dir, "set.000/coord.npy")) + box = np.load(os.path.join(self.data_dir, "set.000/box.npy")) + self.atype = np.loadtxt(os.path.join(self.data_dir, "type.raw")) + self.coord = np.vstack([coord, coord]) + self.box = np.vstack([box, box]) + self.freq = 10 + + self.pbtxts = [os.path.join(tests_path, "infer/deeppot.pbtxt"), + os.path.join(tests_path, "infer/deeppot-1.pbtxt")] + self.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in self.pbtxts] + for pbtxt, pb in zip(self.pbtxts, self.graph_dirs): + convert_pbtxt_to_pb(pbtxt, pb) + self.graphs = [DeepPotential(pb) for pb in self.graph_dirs] + self.output = os.path.join(tests_path, "model_devi.out") + self.expect = np.array([0, 1.670048e-01, 4.182279e-04, 8.048649e-02, 5.095047e-01, 4.584241e-01, 4.819783e-01]) + + def test_calc_model_devi(self): + model_devi = calc_model_devi(self.coord, + self.box, + self.atype, + self.graphs, + frequency=self.freq, + nopbc=True, + fname=self.output) + self.assertEqual(model_devi[0][0], 0) + self.assertEqual(model_devi[1][0], self.freq) + for ii in range(1, 7): + self.assertAlmostEqual(model_devi[0][ii], self.expect[ii]) + self.assertEqual(model_devi[0][ii], model_devi[1][ii]) + self.assertTrue(os.path.isfile(self.output)) + + def tearDown(self): + for pb in self.graph_dirs: + os.remove(pb) + os.remove(self.output) + del_data()