From ce2a71e9c3f57cfea312fda141167b0bc89c1d6e Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 18:18:58 -0400 Subject: [PATCH 1/8] add simplify jdata docs; fix and check example Fix #779. --- doc/simplify/simplify-jdata.rst | 6 ++ dpgen/simplify/arginfo.py | 107 +++++++++++++++++++++++++++++++- examples/simplify/qm7.json | 10 +-- tests/test_check_examples.py | 5 ++ 4 files changed, 118 insertions(+), 10 deletions(-) create mode 100644 doc/simplify/simplify-jdata.rst diff --git a/doc/simplify/simplify-jdata.rst b/doc/simplify/simplify-jdata.rst new file mode 100644 index 000000000..520c889ab --- /dev/null +++ b/doc/simplify/simplify-jdata.rst @@ -0,0 +1,6 @@ +dpgen simplify parameters +========================= + +.. dargs:: + :module: dpgen.simplify.arginfo + :func: simplify_jdata_arginfo diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py index 0fbfe606e..190a392e9 100644 --- a/dpgen/simplify/arginfo.py +++ b/dpgen/simplify/arginfo.py @@ -1,10 +1,113 @@ -from dargs import Argument +from typing import List +from dargs import Argument, Variant from dpgen.arginfo import general_mdata_arginfo +from dpgen.generator.arginfo import ( + basic_args, + data_args, + training_args, + fp_style_vasp_args, + fp_style_gaussian_args, +) + + +def general_simplify_arginfo() -> Argument: + """General simplify arginfo. + + Returns + ------- + Argument + arginfo + """ + doc_labeled = "If true, the initial data is labeled." + doc_pick_data = "Path to the directory with the pick data with the deepmd/npy format. Systems are detected recursively." + doc_init_pick_number = "The number of initial pick data." + doc_iter_pick_number = "The number of pick data in each iteration." + doc_model_devi_f_trust_lo = "The lower bound of forces for the selection for the model deviation." + doc_model_devi_f_trust_hi = "The higher bound of forces for the selection for the model deviation." + + return [ + Argument("labeled", bool, optional=False, default=False, doc=doc_labeled), + Argument("pick_data", str, doc=doc_pick_data), + Argument("init_pick_number", int, doc=doc_init_pick_number), + Argument("iter_pick_number", int, doc=doc_iter_pick_number), + Argument("model_devi_f_trust_lo", float, optional=False, doc=doc_model_devi_f_trust_lo), + Argument("model_devi_f_trust_hi", float, optional=False, doc=doc_model_devi_f_trust_hi), + ] + + +def fp_style_variant_type_args() -> Variant: + """Generate variant for fp style variant type. + + Returns + ------- + Variant + variant for fp style + """ + doc_fp_style = 'Software for First Principles, if `labeled` is false. Options include “vasp”, “gaussian” up to now.' + doc_fp_style_none = 'No fp.' + doc_fp_style_vasp = 'VASP.' + doc_fp_style_gaussian = 'Gaussian. The command should be set as `g16 < input`.' + + return Variant("fp_style", [ + Argument("none", dict, doc=doc_fp_style_none), + # simplify use the same fp method as run + Argument("vasp", dict, fp_style_vasp_args(), doc=doc_fp_style_vasp), + Argument("gaussian", dict, fp_style_gaussian_args(), + doc=doc_fp_style_gaussian), + ], + optional=True, + default_tag="none", + doc=doc_fp_style) + + +def fp_args() -> List[Argument]: + """Generate arginfo for fp. + + Returns + ------- + List[Argument] + arginfo + """ + doc_fp_task_max = 'Maximum of structures to be calculated in 02.fp of each iteration.' + doc_fp_task_min = 'Minimum of structures to be calculated in 02.fp of each iteration.' + + return [ + Argument("fp_task_max", int, optional=True, doc=doc_fp_task_max), + Argument("fp_task_min", int, optional=True, doc=doc_fp_task_min), + ] + + +def simplify_jdata_arginfo() -> Argument: + """Generate arginfo for dpgen simplify jdata. + + Returns + ------- + Argument + arginfo + """ + doc_run_jdata = "Parameters for simplify.json, the first argument of `dpgen simplify`." + return Argument("simplify_jdata", + dict, + sub_fields=[ + *basic_args(), + # TODO: we may remove sys_configs; it is required in train method + *data_args(), + *general_simplify_arginfo(), + # simplify use the same training method as run + *training_args(), + *fp_args(), + ], + sub_variants=[ + fp_style_variant_type_args(), + ], + doc=doc_run_jdata, + ) + def simplify_mdata_arginfo() -> Argument: """Generate arginfo for dpgen simplify mdata. - + Returns ------- Argument diff --git a/examples/simplify/qm7.json b/examples/simplify/qm7.json index 648c589e7..10c76d15d 100644 --- a/examples/simplify/qm7.json +++ b/examples/simplify/qm7.json @@ -20,7 +20,6 @@ "auto" ], "numb_models": 4, - "train_param": "input.json", "default_training_param": { "model": { "type_map": [ @@ -92,11 +91,8 @@ }, "use_clusters": true, "fp_style": "gaussian", - "shuffle_poscar": false, "fp_task_max": 1000, "fp_task_min": 10, - "fp_pp_path": "/home/jzzeng/", - "fp_pp_files": [], "fp_params": { "keywords": "mn15/6-31g** force nosymm scf(maxcyc=512)", "nproc": 28, @@ -105,9 +101,7 @@ }, "init_pick_number":100, "iter_pick_number":100, - "e_trust_lo":1e10, - "e_trust_hi":1e10, - "f_trust_lo":0.25, - "f_trust_hi":0.45, + "model_devi_f_trust_lo":0.10, + "model_devi_f_trust_hi":0.30, "_comment": " that's all " } diff --git a/tests/test_check_examples.py b/tests/test_check_examples.py index 102e98490..61dbbc977 100644 --- a/tests/test_check_examples.py +++ b/tests/test_check_examples.py @@ -9,8 +9,12 @@ from dpgen.data.arginfo import ( init_reaction_jdata_arginfo, ) +from dpgen.simplify.arginfo import ( + simplify_jdata_arginfo, +) init_reaction_jdata = init_reaction_jdata_arginfo() +simplify_jdata = simplify_jdata_arginfo() # directory of examples p_examples = Path(__file__).parent.parent / "examples" @@ -19,6 +23,7 @@ # tuple of example list input_files = ( (init_reaction_jdata, p_examples / "init" / "reaction.json"), + (simplify_jdata, p_examples / "simplify" / "qm7.json"), ) From 9437d077334a1e57c4bf9e0d6b30bb5c3da0a5bd Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 18:28:16 -0400 Subject: [PATCH 2/8] `use_ele_temp` should be optional --- dpgen/generator/arginfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index a9f05d429..e923c6668 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -26,7 +26,7 @@ def basic_args() -> List[Argument]: return [ Argument("type_map", list, optional=False, doc=doc_type_map), Argument("mass_map", list, optional=False, doc=doc_mass_map), - Argument("use_ele_temp", int, optional=False, + Argument("use_ele_temp", int, optional=True, default=0, doc=doc_use_ele_temp), ] From 6d7bc790270afcf8131d1fb5037a29620cc17e4d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 18:35:18 -0400 Subject: [PATCH 3/8] `sys_format` should be optional --- dpgen/generator/arginfo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index e923c6668..35296f59b 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -34,7 +34,7 @@ def basic_args() -> List[Argument]: def data_args() -> List[Argument]: doc_init_data_prefix = 'Prefix of initial data directories.' doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.' - doc_sys_format = 'Format of initial data. It will be vasp/poscar if not set.' + doc_sys_format = 'Format of initial data.' doc_init_batch_size = 'Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms.' doc_sys_configs_prefix = 'Prefix of sys_configs.' doc_sys_configs = 'Containing directories of structures to be explored in iterations.Wildcard characters are supported here.' @@ -45,7 +45,7 @@ def data_args() -> List[Argument]: doc=doc_init_data_prefix), Argument("init_data_sys", list, optional=False, doc=doc_init_data_sys), - Argument("sys_format", str, optional=False, doc=doc_sys_format), + Argument("sys_format", str, optional=True, default='vasp/poscar', doc=doc_sys_format), Argument("init_batch_size", str, optional=True, doc=doc_init_batch_size), Argument("sys_configs_prefix", str, optional=True, From ede220f04608bf41b216a551866370f5cef12309 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 18:45:00 -0400 Subject: [PATCH 4/8] `sys_configs` should be list --- dpgen/generator/arginfo.py | 2 +- examples/simplify/qm7.json | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 35296f59b..8a097451a 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -50,7 +50,7 @@ def data_args() -> List[Argument]: doc=doc_init_batch_size), Argument("sys_configs_prefix", str, optional=True, doc=doc_sys_configs_prefix), - Argument("sys_configs", str, optional=False, doc=doc_sys_configs), + Argument("sys_configs", list, optional=False, doc=doc_sys_configs), Argument("sys_batch_size", list, optional=True, doc=doc_sys_batch_size), ] diff --git a/examples/simplify/qm7.json b/examples/simplify/qm7.json index 10c76d15d..131a903a6 100644 --- a/examples/simplify/qm7.json +++ b/examples/simplify/qm7.json @@ -16,6 +16,7 @@ "pick_data": "/scratch/jz748/simplify/qm7", "init_data_prefix": "", "init_data_sys": [], + "sys_configs": [null], "sys_batch_size": [ "auto" ], From b0b4c0488545879cfd84a1f8ea99b9c7a35a3de6 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 18:46:38 -0400 Subject: [PATCH 5/8] `labeled` should be optional --- dpgen/simplify/arginfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py index 190a392e9..c325c5628 100644 --- a/dpgen/simplify/arginfo.py +++ b/dpgen/simplify/arginfo.py @@ -27,7 +27,7 @@ def general_simplify_arginfo() -> Argument: doc_model_devi_f_trust_hi = "The higher bound of forces for the selection for the model deviation." return [ - Argument("labeled", bool, optional=False, default=False, doc=doc_labeled), + Argument("labeled", bool, optional=True, default=False, doc=doc_labeled), Argument("pick_data", str, doc=doc_pick_data), Argument("init_pick_number", int, doc=doc_init_pick_number), Argument("iter_pick_number", int, doc=doc_iter_pick_number), From 9a12a494774c0164afd58ca1eba7a0e97a1eba5a Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 18:47:33 -0400 Subject: [PATCH 6/8] `use_clusters` and `cluster_cutoff` are optional --- dpgen/generator/arginfo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 8a097451a..ced694fc8 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -207,9 +207,9 @@ def fp_style_gaussian_args() -> List[Argument]: doc_fp_params_gaussian = 'Parameters for Gaussian calculation.' return [ - Argument("use_clusters", bool, optional=False, doc=doc_use_clusters), + Argument("use_clusters", bool, optional=True, default=False, doc=doc_use_clusters), Argument("cluster_cutoff", float, - optional=False, doc=doc_cluster_cutoff), + optional=True, doc=doc_cluster_cutoff), Argument("fp_params", dict, args, [], optional=False, doc=doc_fp_params_gaussian), ] From 3c70c8b50ab965d1adf0d4177a21c2ce5c44d11c Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 18:48:41 -0400 Subject: [PATCH 7/8] fix keyword option --- dpgen/generator/arginfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index ced694fc8..b215ef77f 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -195,7 +195,7 @@ def fp_style_gaussian_args() -> List[Argument]: doc_nproc = 'The number of processors for Gaussian input.' args = [ - Argument("doc_keywords", [str or list], + Argument("keywords", [str or list], optional=False, doc=doc_keywords), Argument("multiplicity", [int or str], optional=False, doc=doc_multiplicity), From 23e8a7676774df9d91fd9408e58e41da033695b2 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 7 Jul 2022 19:03:44 -0400 Subject: [PATCH 8/8] add to toc --- doc/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/index.rst b/doc/index.rst index acd30efbd..65161739d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -53,6 +53,7 @@ DPGEN's documentation :maxdepth: 2 :caption: Simplify + simplify/simplify-jdata simplify/simplify-mdata