From 809867b1912718a396de467f786d6a53ee51f245 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 7 Jun 2022 23:16:37 -0400 Subject: [PATCH 1/9] generate run arg doc from scripts This commit generates run argument documentations from scripts at runtime, so we can work on the script to improve the docs. --- doc/.gitignore | 2 + doc/gen_arginfo.py | 11 +- doc/index.rst | 6 +- doc/{run-mdata.rst => run/mdata.rst} | 2 +- doc/run/param.rst | 443 +-------------------------- dpgen/generator/arginfo.py | 296 +++++++++++++++++- 6 files changed, 311 insertions(+), 449 deletions(-) create mode 100644 doc/.gitignore rename doc/{run-mdata.rst => run/mdata.rst} (90%) diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 000000000..0f59d284d --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,2 @@ +run-mdata-auto.rst +run-jdata-auto.rst \ No newline at end of file diff --git a/doc/gen_arginfo.py b/doc/gen_arginfo.py index e51d44dc8..286f50665 100644 --- a/doc/gen_arginfo.py +++ b/doc/gen_arginfo.py @@ -1,5 +1,12 @@ -from dpgen.generator.arginfo import run_mdata_arginfo +from dpgen.generator.arginfo import ( + run_mdata_arginfo, + run_jdata_arginfo, +) -run_mdata_doc = run_mdata_arginfo().gen_doc() +run_mdata_doc = run_mdata_arginfo().gen_doc(make_anchor=True) with open('run-mdata-auto.rst', 'w') as f: f.write(run_mdata_doc) + +run_jdata_doc = run_jdata_arginfo().gen_doc(make_anchor=True) +with open('run-jdata-auto.rst', 'w') as f: + f.write(run_jdata_doc) diff --git a/doc/index.rst b/doc/index.rst index 341ce5d79..9fa88431d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -24,7 +24,7 @@ DPGEN's documentation run/run-process.rst run/param.rst - run-mdata.rst + run/mdata.rst .. _init:: @@ -54,14 +54,14 @@ DPGEN's documentation :caption: Tutorial :glob: - toymodels/* + Tutorials .. _Contribution: .. toctree:: :maxdepth: 2 - :caption: Contribution Guild + :caption: Contribution Guide README.md api/api diff --git a/doc/run-mdata.rst b/doc/run/mdata.rst similarity index 90% rename from doc/run-mdata.rst rename to doc/run/mdata.rst index 4f92fc4fc..de1a3c107 100644 --- a/doc/run-mdata.rst +++ b/doc/run/mdata.rst @@ -3,4 +3,4 @@ dpgen run machine parameters .. note:: One can load, modify, and export the input file by using our effective web-based tool `DP-GUI `_. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file. -.. include:: run-mdata-auto.rst +.. include:: ../run-mdata-auto.rst diff --git a/doc/run/param.rst b/doc/run/param.rst index 749c4e956..2c3d817ee 100644 --- a/doc/run/param.rst +++ b/doc/run/param.rst @@ -2,445 +2,4 @@ dpgen run param parameters ============================= -type_map: - | type: ``list`` - | argument path: ``type_map`` - - Atom types. - -mass_map: - | type: ``list`` - | argument path: ``mass_map`` - - Standard atom weights. - -use_ele_temp: - | type: ``int`` - | argument path: ``use_ele_temp`` - - Currently only support fp_style vasp. - - - 0: no electron temperature. - - - 1: eletron temperature as frame parameter. - - - 2: electron temperature as atom parameter. - -init_data_prefix: - | type: ``str``, optional - | argument path: ``init_data_prefix`` - - Prefix of initial data directories. - -init_data_prefix: - | type: ``list`` - | argument path: ``init_data_prefix`` - - Directories of initial data. You may use either absolute or relative path here. - -sys_format: - | type: ``str`` - | argument path: ``sys_format`` - - Format of initial data. It will be vasp/poscar if not set. - -init_multi_systems: - | type: ``bool``, optional - | argument path: ``init_multi_systems`` - - If set to true, init_data_sys directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems. - -init_batch_size: - | type: ``str``, optional - | argument path: ``init_batch_size`` - - Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms. - -sys_configs_prefix: - | type: ``str``, optional - | argument path: ``sys_configs_prefix`` - - Prefix of sys_configs. - -sys_configs: - | type: ``str`` - | argument path: ``sys_configs`` - - Containing directories of structures to be explored in iterations.Wildcard characters are supported here. - -sys_batch_size: - | type: ``list``, optional - | argument path: ``sys_batch_size`` - - Each number is the batch_size for training of corresponding system in sys_configs. If set to auto, batch size will be 32 divided by number of atoms. - -numb_models: - | type: ``int`` - | argument path: ``numb_models`` - - Number of models to be trained in 00.train. 4 is recommend. - -training_iter0_model_path: - | type: ``list``, optional - | argument path: ``training_iter0_model_path`` - - The model used to init the first iter training. Number of element should be equal to numb_models. - -training_init_model: - | type: ``bool``, optional - | argument path: ``training_init_model`` - - Iteration > 0, the model parameters will be initilized from the model trained at the previous iteration. Iteration == 0, the model parameters will be initialized from training_iter0_model_path. - -default_training_param: - | type: ``dict`` - | argument path: ``default_training_param`` - - Training parameters for deepmd-kit in 00.train. You can find instructions from here: (https://github.com/deepmodeling/deepmd-kit). - -dp_compress: - | type: ``bool``, optional, default: ``False`` - | argument path: ``dp_compress`` - - Use dp compress to compress the model. - -model_devi_dt: - | type: ``float`` - | argument path: ``model_devi_dt`` - - Timestep for MD. 0.002 is recommend. - -model_devi_skip: - | type: ``int`` - | argument path: ``model_devi_skip`` - - Number of structures skipped for fp in each MD. - -model_devi_f_trust_lo: - | type: ``list`` | ``float`` - | argument path: ``model_devi_f_trust_lo`` - - Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively. - -model_devi_f_trust_hi: - | type: ``list`` | ``float`` - | argument path: ``model_devi_f_trust_hi`` - - Upper bound of forces for the selection. If list, should be set for each index in sys_configs, respectively. - -model_devi_v_trust_lo: - | type: ``list`` | ``float`` - | argument path: ``model_devi_v_trust_lo`` - - Lower bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x. - -model_devi_v_trust_hi: - | type: ``list`` | ``float`` - | argument path: ``model_devi_v_trust_hi`` - - Upper bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x. - -model_devi_adapt_trust_lo: - | type: ``bool``, optional - | argument path: ``model_devi_adapt_trust_lo`` - - Adaptively determines the lower trust levels of force and virial. This option should be used together with model_devi_numb_candi_f, model_devi_numb_candi_v and optionally with model_devi_perc_candi_f and model_devi_perc_candi_v. dpgen will make two sets: - - - 1. From the frames with force model deviation lower than model_devi_f_trust_hi, select max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames) frames with largest force model deviation. - - - 2. From the frames with virial model deviation lower than model_devi_v_trust_hi, select max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames) frames with largest virial model deviation. - - The union of the two sets is made as candidate dataset. - -model_devi_numb_candi_f: - | type: ``int``, optional - | argument path: ``model_devi_numb_candi_f`` - - See model_devi_adapt_trust_lo. - -model_devi_numb_candi_v: - | type: ``int``, optional - | argument path: ``model_devi_numb_candi_v`` - - See model_devi_adapt_trust_lo. - -model_devi_perc_candi_f: - | type: ``float``, optional - | argument path: ``model_devi_perc_candi_f`` - - See model_devi_adapt_trust_lo. - -model_devi_perc_candi_v: - | type: ``float``, optional - | argument path: ``model_devi_perc_candi_v`` - - See model_devi_adapt_trust_lo. - -model_devi_f_avg_relative: - | type: ``bool``, optional - | argument path: ``model_devi_f_avg_relative`` - - Normalized the force model deviations by the RMS force magnitude along the trajectory. This key should not be used with use_relative. - -model_devi_clean_traj: - | type: ``bool`` | ``int`` - | argument path: ``model_devi_clean_traj`` - - If type of model_devi_clean_traj is bool type then it denote whether to clean traj folders in MD since they are too large. If it is Int type, then the most recent n iterations of traj folders will be retained, others will be removed. - -model_devi_nopbc: - | type: ``bool`` - | argument path: ``model_devi_nopbc`` - - Assume open boundary condition in MD simulations. - -model_devi_activation_func: - | type: ``list``, optional - | argument path: ``model_devi_activation_func`` - - Set activation functions for models, length of the list should be the same as numb_models, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. Backward compatibility: the orginal "list of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the list should be the same as numb_models. - -model_devi_jobs: - | type: ``dict`` | ``list`` - | argument path: ``model_devi_jobs`` - - Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations - - sys_idx: - | type: ``list`` - | argument path: ``model_devi_jobs/sys_idx`` - - Systems to be selected as the initial structure of MD and be explored. The index corresponds exactly to the sys_configs. - - temps: - | type: ``list`` - | argument path: ``model_devi_jobs/temps`` - - Temperature (K) in MD. - - press: - | type: ``list`` - | argument path: ``model_devi_jobs/press`` - - Pressure (Bar) in MD. - - trj_freq: - | type: ``int`` - | argument path: ``model_devi_jobs/trj_freq`` - - Frequecy of trajectory saved in MD. - - nsteps: - | type: ``int`` - | argument path: ``model_devi_jobs/nsteps`` - - Running steps of MD. - - ensembles: - | type: ``str`` - | argument path: ``model_devi_jobs/ensembles`` - - Determining which ensemble used in MD, options include “npt” and “nvt”. - - neidelay: - | type: ``int``, optional - | argument path: ``model_devi_jobs/neidelay`` - - Delay building until this many steps since last build. - - taut: - | type: ``float`` | ``str``, optional - | argument path: ``model_devi_jobs/taut`` - - Coupling time of thermostat (ps). - - taup: - | type: ``float`` | ``str``, optional - | argument path: ``model_devi_jobs/taup`` - - Coupling time of barostat (ps). - -fp_style: - | type: ``dict`` - | argument path: ``fp_style`` - - Software for First Principles. Options include “vasp”, “pwscf”, “siesta” and “gaussian” up to now. - - - Depending on the value of *fp_style*, different sub args are accepted. - - fp_style: - | type: ``str`` (flag key) - | argument path: ``fp_style/fp_style`` - | possible choices: vasp, gaussian, siesta, cp2k - - The code used for fp tasks. - - - When *fp_style* is set to ``vasp``: - - fp_pp_path: - | type: ``str`` - | argument path: ``fp_style[vasp]/fp_pp_path`` - - Directory of psuedo-potential file to be used for 02.fp exists. - - fp_pp_files: - | type: ``list`` - | argument path: ``fp_style[vasp]/fp_pp_files`` - - Psuedo-potential file to be used for 02.fp. Note that the order of elements should correspond to the order in type_map. - - fp_incar: - | type: ``str`` - | argument path: ``fp_style[vasp]/fp_incar`` - - Input file for VASP. INCAR must specify KSPACING and KGAMMA. - - fp_aniso_kspacing: - | type: ``list`` - | argument path: ``fp_style[vasp]/fp_aniso_kspacing`` - - Set anisotropic kspacing. Usually useful for 1-D or 2-D materials. Only support VASP. If it is setting the KSPACING key in INCAR will be ignored. - - cvasp: - | type: ``bool`` - | argument path: ``fp_style[vasp]/cvasp`` - - If cvasp is true, DP-GEN will use Custodian to help control VASP calculation. - - - When *fp_style* is set to ``gaussian``: - - use_clusters: - | type: ``bool`` - | argument path: ``fp_style[gaussian]/use_clusters`` - - If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x. - - cluster_cutoff: - | type: ``float`` - | argument path: ``fp_style[gaussian]/cluster_cutoff`` - - The cutoff radius of clusters if use_clusters is set to true. - - fp_params: - | type: ``dict`` - | argument path: ``fp_style[gaussian]/fp_params`` - - Parameters for Gaussian calculation. - - doc_keywords: - | type: ``str`` | ``list`` - | argument path: ``fp_style[gaussian]/fp_params/doc_keywords`` - - Keywords for Gaussian input. - - multiplicity: - | type: ``int`` | ``str`` - | argument path: ``fp_style[gaussian]/fp_params/multiplicity`` - - Spin multiplicity for Gaussian input. If set to auto, the spin multiplicity will be detected automatically. If set to frag, the "fragment=N" method will be used. - - nproc: - | type: ``int`` - | argument path: ``fp_style[gaussian]/fp_params/nproc`` - - The number of processors for Gaussian input. - - - When *fp_style* is set to ``siesta``: - - use_clusters: - | type: ``bool`` - | argument path: ``fp_style[siesta]/use_clusters`` - - If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x. - - cluster_cutoff: - | type: ``float`` - | argument path: ``fp_style[siesta]/cluster_cutoff`` - - The cutoff radius of clusters if use_clusters is set to true. - - fp_params: - | type: ``dict`` - | argument path: ``fp_style[siesta]/fp_params`` - - Parameters for siesta calculation. - - ecut: - | type: ``int`` - | argument path: ``fp_style[siesta]/fp_params/ecut`` - - Define the plane wave cutoff for grid. - - ediff: - | type: ``float`` - | argument path: ``fp_style[siesta]/fp_params/ediff`` - - Tolerance of Density Matrix. - - kspacing: - | type: ``float`` - | argument path: ``fp_style[siesta]/fp_params/kspacing`` - - Sample factor in Brillouin zones. - - mixingweight: - | type: ``float`` - | argument path: ``fp_style[siesta]/fp_params/mixingweight`` - - Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing). - - NumberPulay: - | type: ``int`` - | argument path: ``fp_style[siesta]/fp_params/NumberPulay`` - - Controls the Pulay convergence accelerator. - - - When *fp_style* is set to ``cp2k``: - - user_fp_params: - | type: ``dict`` - | argument path: ``fp_style[cp2k]/user_fp_params`` - - Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use. we assume that you have basic knowledge for cp2k input. - - external_input_path: - | type: ``str`` - | argument path: ``fp_style[cp2k]/external_input_path`` - - Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail. - -fp_task_max: - | type: ``int`` - | argument path: ``fp_task_max`` - - Maximum of structures to be calculated in 02.fp of each iteration. - -fp_task_min: - | type: ``int`` - | argument path: ``fp_task_min`` - - Minimum of structures to be calculated in 02.fp of each iteration. - -fp_accurate_threshold: - | type: ``float``, optional - | argument path: ``fp_accurate_threshold`` - - If the accurate ratio is larger than this number, no fp calculation will be performed, i.e. fp_task_max = 0. - -fp_accurate_soft_threshold: - | type: ``float``, optional - | argument path: ``fp_accurate_soft_threshold`` - - If the accurate ratio is between this number and fp_accurate_threshold, the fp_task_max linearly decays to zero. - -fp_cluster_vacuum: - | type: ``float``, optional - | argument path: ``fp_cluster_vacuum`` - - If the vacuum size is smaller than this value, this cluster will not be choosen for labeling. - +.. include:: ../run-jdata-auto.rst diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index f8815862d..8cc60ba14 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -1,7 +1,9 @@ -from dargs import Argument +from typing import List +from dargs import Argument, Variant from dpgen.dispatcher.Dispatcher import mdata_arginfo + def run_mdata_arginfo() -> Argument: """Generate arginfo for dpgen run mdata. @@ -23,3 +25,295 @@ def run_mdata_arginfo() -> Argument: doc=doc_mdata % task, )) return Argument("run_mdata", dict, sub_fields=sub_fields, doc=doc_run_mdata) + + +# basics +def basic_args() -> List[Argument]: + doc_type_map = 'Atom types.' + doc_mass_map = 'Standard atom weights.' + doc_use_ele_temp = 'Currently only support fp_style vasp. \n\n\ +- 0: no electron temperature. \n\n\ +- 1: eletron temperature as frame parameter. \n\n\ +- 2: electron temperature as atom parameter.' + + return [ + Argument("type_map", list, optional=False, doc=doc_type_map), + Argument("mass_map", list, optional=False, doc=doc_mass_map), + Argument("use_ele_temp", int, optional=False, + default=0, doc=doc_use_ele_temp), + ] + + +def data_args() -> List[Argument]: + doc_init_data_prefix = 'Prefix of initial data directories.' + doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here.' + doc_sys_format = 'Format of initial data. It will be vasp/poscar if not set.' + doc_init_multi_systems = 'If set to true, init_data_sys directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.' + doc_init_batch_size = 'Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms.' + doc_sys_configs_prefix = 'Prefix of sys_configs.' + doc_sys_configs = 'Containing directories of structures to be explored in iterations.Wildcard characters are supported here.' + doc_sys_batch_size = 'Each number is the batch_size for training of corresponding system in sys_configs. If set to auto, batch size will be 32 divided by number of atoms.' + + return [ + Argument("init_data_prefix", str, optional=True, + doc=doc_init_data_prefix), + Argument("init_data_sys", list, + optional=False, doc=doc_init_data_sys), + Argument("sys_format", str, optional=False, doc=doc_sys_format), + Argument("init_multi_systems", bool, optional=True, + doc=doc_init_multi_systems), + Argument("init_batch_size", str, optional=True, + doc=doc_init_batch_size), + Argument("sys_configs_prefix", str, optional=True, + doc=doc_sys_configs_prefix), + Argument("sys_configs", str, optional=False, doc=doc_sys_configs), + Argument("sys_batch_size", list, optional=True, + doc=doc_sys_batch_size), + ] + +# Training + + +def training_args() -> List[Argument]: + doc_numb_models = 'Number of models to be trained in 00.train. 4 is recommend.' + doc_training_iter0_model_path = 'The model used to init the first iter training. Number of element should be equal to numb_models.' + doc_training_init_model = 'Iteration > 0, the model parameters will be initilized from the model trained at the previous iteration. Iteration == 0, the model parameters will be initialized from training_iter0_model_path.' + doc_default_training_param = 'Training parameters for deepmd-kit in 00.train. You can find instructions from here: (https://github.com/deepmodeling/deepmd-kit).' + doc_dp_compress = 'Use dp compress to compress the model.' + + return [ + Argument("numb_models", int, optional=False, doc=doc_numb_models), + Argument("training_iter0_model_path", list, optional=True, + doc=doc_training_iter0_model_path), + Argument("training_init_model", bool, optional=True, + doc=doc_training_init_model), + Argument("default_training_param", dict, optional=False, + doc=doc_default_training_param), + Argument("dp_compress", bool, optional=True, + default=False, doc=doc_dp_compress), + ] + + +# Exploration +def model_devi_jobs_args() -> List[Argument]: + # this may be not correct + doc_sys_idx = 'Systems to be selected as the initial structure of MD and be explored. The index corresponds exactly to the sys_configs.' + doc_temps = 'Temperature (K) in MD.' + doc_press = 'Pressure (Bar) in MD.' + doc_trj_freq = 'Frequecy of trajectory saved in MD.' + doc_nsteps = 'Running steps of MD.' + doc_ensembles = 'Determining which ensemble used in MD, options include “npt” and “nvt”.' + doc_neidelay = 'delay building until this many steps since last build.' + doc_taut = 'Coupling time of thermostat (ps).' + doc_taup = 'Coupling time of barostat (ps).' + + args = [ + Argument("sys_idx", list, optional=False, doc=doc_sys_idx), + Argument("temps", list, optional=False, doc=doc_temps), + Argument("press", list, optional=False, doc=doc_press), + Argument("trj_freq", int, optional=False, doc=doc_trj_freq), + Argument("nsteps", int, optional=False, doc=doc_nsteps), + Argument("ensembles", str, optional=False, doc=doc_ensembles), + Argument("neidelay", int, optional=True, doc=doc_neidelay), + Argument("taut", float, optional=True, doc=doc_taut), + Argument("taup", float, optional=True, doc=doc_taup), + ] + + doc_model_devi_jobs = 'Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations' + return Argument("model_devi_jobs", list, args, [], doc=doc_model_devi_jobs) + + +def model_devi_args() -> List[Argument]: + doc_model_devi_dt = 'Timestep for MD. 0.002 is recommend.' + doc_model_devi_skip = 'Number of structures skipped for fp in each MD.' + doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.' + doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.' + doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' + doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' + doc_model_devi_adapt_trust_lo = 'Adaptively determines the lower trust levels of force and virial. This option should be used together with model_devi_numb_candi_f, model_devi_numb_candi_v and optionally with model_devi_perc_candi_f and model_devi_perc_candi_v. dpgen will make two sets:\n\n\ +- 1. From the frames with force model deviation lower than model_devi_f_trust_hi, select max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames) frames with largest force model deviation. \n\n\ +- 2. From the frames with virial model deviation lower than model_devi_v_trust_hi, select max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames) frames with largest virial model deviation. \n\n\ +The union of the two sets is made as candidate dataset.' + doc_model_devi_numb_candi_f = 'See model_devi_adapt_trust_lo.' + doc_model_devi_numb_candi_v = 'See model_devi_adapt_trust_lo.' + doc_model_devi_perc_candi_f = 'See model_devi_adapt_trust_lo.' + doc_model_devi_perc_candi_v = 'See model_devi_adapt_trust_lo.' + doc_model_devi_f_avg_relative = 'Normalized the force model deviations by the RMS force magnitude along the trajectory. This key should not be used with use_relative.' + doc_model_devi_clean_traj = 'If type of model_devi_clean_traj is bool type then it denote whether to clean traj folders in MD since they are too large. If it is Int type, then the most recent n iterations of traj folders will be retained, others will be removed.' + doc_model_devi_nopbc = 'Assume open boundary condition in MD simulations.' + doc_model_devi_activation_func = 'Set activation functions for models, length of the list should be the same as numb_models, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. Backward compatibility: the orginal "list of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the list should be the same as numb_models.' + + return [ + model_devi_jobs_args(), + Argument("model_devi_dt", float, + optional=False, doc=doc_model_devi_dt), + Argument("model_devi_skip", int, optional=False, + doc=doc_model_devi_skip), + Argument("model_devi_f_trust_lo", [ + float, list], optional=False, doc=doc_model_devi_f_trust_lo), + Argument("model_devi_f_trust_hi", [ + float, list], optional=False, doc=doc_model_devi_f_trust_hi), + Argument("model_devi_v_trust_lo", [ + float, list], optional=False, doc=doc_model_devi_v_trust_lo), + Argument("model_devi_v_trust_hi", [ + float, list], optional=False, doc=doc_model_devi_v_trust_hi), + Argument("model_devi_adapt_trust_lo", bool, optional=True, + doc=doc_model_devi_adapt_trust_lo), + Argument("model_devi_numb_candi_f", int, optional=True, + doc=doc_model_devi_numb_candi_f), + Argument("model_devi_numb_candi_v", int, optional=True, + doc=doc_model_devi_numb_candi_v), + Argument("model_devi_perc_candi_f", float, + optional=True, doc=doc_model_devi_perc_candi_f), + Argument("model_devi_perc_candi_v", float, + optional=True, doc=doc_model_devi_perc_candi_v), + Argument("model_devi_f_avg_relative", bool, optional=True, + doc=doc_model_devi_f_avg_relative), + Argument("model_devi_clean_traj", [ + bool, int], optional=False, doc=doc_model_devi_clean_traj), + Argument("model_devi_nopbc", bool, optional=False, + doc=doc_model_devi_nopbc), + Argument("model_devi_activation_func", list, optional=True, + doc=doc_model_devi_activation_func), + ] + + +# Labeling +# vasp +def fp_style_vasp_args() -> List[Argument]: + doc_fp_pp_path = 'Directory of psuedo-potential file to be used for 02.fp exists.' + doc_fp_pp_files = 'Psuedo-potential file to be used for 02.fp. Note that the order of elements should correspond to the order in type_map.' + doc_fp_incar = 'Input file for VASP. INCAR must specify KSPACING and KGAMMA.' + doc_fp_aniso_kspacing = 'Set anisotropic kspacing. Usually useful for 1-D or 2-D materials. Only support VASP. If it is setting the KSPACING key in INCAR will be ignored.' + doc_cvasp = 'If cvasp is true, DP-GEN will use Custodian to help control VASP calculation.' + + return [ + Argument("fp_pp_path", str, optional=False, doc=doc_fp_pp_path), + Argument("fp_pp_files", list, optional=False, doc=doc_fp_pp_files), + Argument("fp_incar", str, optional=False, doc=doc_fp_incar), + Argument("fp_aniso_kspacing", list, optional=False, + doc=doc_fp_aniso_kspacing), + Argument("cvasp", bool, optional=False, doc=doc_cvasp), + ] + + +# gaussian +def fp_style_gaussian_args() -> List[Argument]: + doc_keywords = 'Keywords for Gaussian input.' + doc_multiplicity = 'Spin multiplicity for Gaussian input. If set to auto, the spin multiplicity will be detected automatically. If set to frag, the "fragment=N" method will be used.' + doc_nproc = 'The number of processors for Gaussian input.' + + args = [ + Argument("doc_keywords", [str or list], + optional=False, doc=doc_keywords), + Argument("multiplicity", [int or str], + optional=False, doc=doc_multiplicity), + Argument("nproc", int, optional=False, doc=doc_nproc), + ] + + doc_use_clusters = 'If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x.' + doc_cluster_cutoff = 'The cutoff radius of clusters if use_clusters is set to true.' + doc_fp_params_gaussian = 'Parameters for Gaussian calculation.' + + return [ + Argument("use_clusters", bool, optional=False, doc=doc_use_clusters), + Argument("cluster_cutoff", float, + optional=False, doc=doc_cluster_cutoff), + Argument("fp_params", dict, args, [], + optional=False, doc=doc_fp_params_gaussian), + ] + +# siesta +def fp_style_siesta_args() -> List[Argument]: + doc_ecut = 'Define the plane wave cutoff for grid.' + doc_ediff = 'Tolerance of Density Matrix.' + doc_kspacing = 'Sample factor in Brillouin zones.' + doc_mixingweight = 'Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).' + doc_NumberPulay = 'Controls the Pulay convergence accelerator.' + + args = [ + Argument("ecut", int, optional=False, doc=doc_ecut), + Argument("ediff", float, optional=False, doc=doc_ediff), + Argument("kspacing", float, optional=False, doc=doc_kspacing), + Argument("mixingweight", float, optional=False, doc=doc_mixingweight), + Argument("NumberPulay", int, optional=False, doc=doc_NumberPulay), + ] + + doc_use_clusters = 'If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x.' + doc_cluster_cutoff = 'The cutoff radius of clusters if use_clusters is set to true.' + doc_fp_params_siesta = 'Parameters for siesta calculation.' + + return [ + Argument("use_clusters", bool, optional=False, doc=doc_use_clusters), + Argument("cluster_cutoff", float, + optional=False, doc=doc_cluster_cutoff), + Argument("fp_params", dict, args, [], + optional=False, doc=doc_fp_params_siesta), + ] + +# cp2k + + +def fp_style_cp2k_args() -> List[Argument]: + doc_user_fp_params = 'Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use. we assume that you have basic knowledge for cp2k input.' + doc_external_input_path = 'Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail.' + + return [ + Argument("user_fp_params", dict, optional=False, + doc=doc_user_fp_params), + Argument("external_input_path", str, optional=False, + doc=doc_external_input_path), + ] + + +def fp_style_variant_type_args() -> Variant: + doc_fp_style = 'The style of fp.' + + return Variant("fp_style", [Argument("vasp", dict, fp_style_vasp_args()), + Argument("gaussian", dict, + fp_style_gaussian_args()), + Argument("siesta", dict, + fp_style_siesta_args()), + Argument("cp2k", dict, fp_style_cp2k_args())], + optional=False, + doc=doc_fp_style) + + +def fp_args() -> List[Argument]: + doc_fp_style = 'Software for First Principles. Options include “vasp”, “pwscf”, “siesta” and “gaussian” up to now.' + doc_fp_task_max = 'Maximum of structures to be calculated in 02.fp of each iteration.' + doc_fp_task_min = 'Minimum of structures to be calculated in 02.fp of each iteration.' + doc_fp_accurate_threshold = 'If the accurate ratio is larger than this number, no fp calculation will be performed, i.e. fp_task_max = 0.' + doc_fp_accurate_soft_threshold = 'If the accurate ratio is between this number and fp_accurate_threshold, the fp_task_max linearly decays to zero.' + doc_fp_cluster_vacuum = 'If the vacuum size is smaller than this value, this cluster will not be choosen for labeling.' + + return [ + Argument('fp_style', dict, [], + [fp_style_variant_type_args()], + optional=False, + doc=doc_fp_style), + Argument("fp_task_max", int, optional=False, doc=doc_fp_task_max), + Argument("fp_task_min", int, optional=False, doc=doc_fp_task_min), + Argument("fp_accurate_threshold", float, + optional=True, doc=doc_fp_accurate_threshold), + Argument("fp_accurate_soft_threshold", float, + optional=True, doc=doc_fp_accurate_soft_threshold), + Argument("fp_cluster_vacuum", float, + optional=True, doc=doc_fp_cluster_vacuum), + ] + + +def run_jdata_arginfo() -> Argument: + """Argument information for dpgen run mdata. + + Returns + ------- + Argument + argument information + """ + doc_run_jdata = "param.json file" + return Argument("run_jdata", + dict, + sub_fields=basic_args() + data_args() + training_args() + model_devi_args() + fp_args(), + doc=doc_run_jdata) + From 31b5f780749fd20542ec68489299547c958158d0 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 10 Jun 2022 01:44:27 -0400 Subject: [PATCH 2/9] remove `init_multi_systems` see #730 --- dpgen/generator/arginfo.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 8cc60ba14..f16c13488 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -46,9 +46,8 @@ def basic_args() -> List[Argument]: def data_args() -> List[Argument]: doc_init_data_prefix = 'Prefix of initial data directories.' - doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here.' + doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.' doc_sys_format = 'Format of initial data. It will be vasp/poscar if not set.' - doc_init_multi_systems = 'If set to true, init_data_sys directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems.' doc_init_batch_size = 'Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms.' doc_sys_configs_prefix = 'Prefix of sys_configs.' doc_sys_configs = 'Containing directories of structures to be explored in iterations.Wildcard characters are supported here.' @@ -60,8 +59,6 @@ def data_args() -> List[Argument]: Argument("init_data_sys", list, optional=False, doc=doc_init_data_sys), Argument("sys_format", str, optional=False, doc=doc_sys_format), - Argument("init_multi_systems", bool, optional=True, - doc=doc_init_multi_systems), Argument("init_batch_size", str, optional=True, doc=doc_init_batch_size), Argument("sys_configs_prefix", str, optional=True, From 7f5333b32b1545da6ddc75d9f086ed072f6b1d30 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 10 Jun 2022 17:29:25 -0400 Subject: [PATCH 3/9] set repeat=True for model_devi_jobs --- dpgen/generator/arginfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index f16c13488..5c253723d 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -117,7 +117,7 @@ def model_devi_jobs_args() -> List[Argument]: ] doc_model_devi_jobs = 'Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations' - return Argument("model_devi_jobs", list, args, [], doc=doc_model_devi_jobs) + return Argument("model_devi_jobs", list, args, [], repeat=True, doc=doc_model_devi_jobs) def model_devi_args() -> List[Argument]: From c215e32b275d40297e784f915f3b723542f6e245 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 11 Jun 2022 00:45:57 -0400 Subject: [PATCH 4/9] switch to dargs directive --- doc/gen_arginfo.py | 11 ++--------- doc/requirements.txt | 1 + doc/run/param.rst | 4 +++- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/doc/gen_arginfo.py b/doc/gen_arginfo.py index 286f50665..e51d44dc8 100644 --- a/doc/gen_arginfo.py +++ b/doc/gen_arginfo.py @@ -1,12 +1,5 @@ -from dpgen.generator.arginfo import ( - run_mdata_arginfo, - run_jdata_arginfo, -) +from dpgen.generator.arginfo import run_mdata_arginfo -run_mdata_doc = run_mdata_arginfo().gen_doc(make_anchor=True) +run_mdata_doc = run_mdata_arginfo().gen_doc() with open('run-mdata-auto.rst', 'w') as f: f.write(run_mdata_doc) - -run_jdata_doc = run_jdata_arginfo().gen_doc(make_anchor=True) -with open('run-jdata-auto.rst', 'w') as f: - f.write(run_jdata_doc) diff --git a/doc/requirements.txt b/doc/requirements.txt index 33ad28e39..c703322be 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -4,4 +4,5 @@ sphinx_rtd_theme sphinx_markdown_tables myst-parser deepmodeling_sphinx +dargs>=0.3.1 . diff --git a/doc/run/param.rst b/doc/run/param.rst index 2c3d817ee..592f8f31c 100644 --- a/doc/run/param.rst +++ b/doc/run/param.rst @@ -2,4 +2,6 @@ dpgen run param parameters ============================= -.. include:: ../run-jdata-auto.rst +.. dargs:: + :module: dpgen.generator.arginfo + :func: run_jdata_arginfo From 64050ed5a88c0cf63bab5e5ec506ddc6bda2d847 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 11 Jun 2022 00:48:24 -0400 Subject: [PATCH 5/9] switch for mdata --- doc/.gitignore | 2 -- doc/conf.py | 4 ---- doc/gen_arginfo.py | 5 ----- doc/run/mdata.rst | 4 +++- 4 files changed, 3 insertions(+), 12 deletions(-) delete mode 100644 doc/.gitignore delete mode 100644 doc/gen_arginfo.py diff --git a/doc/.gitignore b/doc/.gitignore deleted file mode 100644 index 0f59d284d..000000000 --- a/doc/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -run-mdata-auto.rst -run-jdata-auto.rst \ No newline at end of file diff --git a/doc/conf.py b/doc/conf.py index 9d5ecc006..b33f88b9a 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -105,9 +105,5 @@ def run_apidoc(_): main(['-M', '--tocfile', 'api', '-H', 'DP-GEN API', '-o', os.path.join(cur_dir, "api"), module, '--force']) -def generate_arginfo(app): - subprocess.check_output((sys.executable, "gen_arginfo.py"), universal_newlines=True) - def setup(app): app.connect('builder-inited', run_apidoc) - app.connect('builder-inited', generate_arginfo) diff --git a/doc/gen_arginfo.py b/doc/gen_arginfo.py deleted file mode 100644 index e51d44dc8..000000000 --- a/doc/gen_arginfo.py +++ /dev/null @@ -1,5 +0,0 @@ -from dpgen.generator.arginfo import run_mdata_arginfo - -run_mdata_doc = run_mdata_arginfo().gen_doc() -with open('run-mdata-auto.rst', 'w') as f: - f.write(run_mdata_doc) diff --git a/doc/run/mdata.rst b/doc/run/mdata.rst index de1a3c107..cad09bec8 100644 --- a/doc/run/mdata.rst +++ b/doc/run/mdata.rst @@ -3,4 +3,6 @@ dpgen run machine parameters .. note:: One can load, modify, and export the input file by using our effective web-based tool `DP-GUI `_. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file. -.. include:: ../run-mdata-auto.rst +.. dargs:: + :module: dpgen.generator.arginfo + :func: run_mdata_arginfo From 82757c57deeea6af213c50d02b099b12e8c6203d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 11 Jun 2022 00:54:38 -0400 Subject: [PATCH 6/9] add extension --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index b33f88b9a..3892320c0 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -40,6 +40,7 @@ extensions = [ 'deepmodeling_sphinx', + 'dargs.sphinx', "sphinx_rtd_theme", 'myst_parser', 'sphinx.ext.autosummary', From 23c7ea1308a6c5987dc0565cfe5875f05497438d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 11 Jun 2022 04:00:12 -0400 Subject: [PATCH 7/9] the current model_devi_args is indeed lammps arguments --- dpgen/generator/arginfo.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 5c253723d..04dbba133 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -120,7 +120,7 @@ def model_devi_jobs_args() -> List[Argument]: return Argument("model_devi_jobs", list, args, [], repeat=True, doc=doc_model_devi_jobs) -def model_devi_args() -> List[Argument]: +def model_devi_lmp_args() -> List[Argument]: doc_model_devi_dt = 'Timestep for MD. 0.002 is recommend.' doc_model_devi_skip = 'Number of structures skipped for fp in each MD.' doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.' @@ -175,6 +175,13 @@ def model_devi_args() -> List[Argument]: ] +def model_devi_args() -> Variant: + doc_model_devi_engine = "Engine for the model deviation task." + return Variant("model_devi_engine", [ + Argument("lammps", dict, model_devi_lmp_args(), doc="LAMMPS"), + ], default="lammps", doc=doc_model_devi_engine) + + # Labeling # vasp def fp_style_vasp_args() -> List[Argument]: @@ -311,6 +318,7 @@ def run_jdata_arginfo() -> Argument: doc_run_jdata = "param.json file" return Argument("run_jdata", dict, - sub_fields=basic_args() + data_args() + training_args() + model_devi_args() + fp_args(), + sub_fields=basic_args() + data_args() + training_args() + fp_args(), + sub_variants=model_devi_args(), doc=doc_run_jdata) From 0470a7e6492f02bcb242d37e0276bde48445784b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 11 Jun 2022 04:04:20 -0400 Subject: [PATCH 8/9] fix error --- dpgen/generator/arginfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 04dbba133..3d2affeb3 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -179,7 +179,7 @@ def model_devi_args() -> Variant: doc_model_devi_engine = "Engine for the model deviation task." return Variant("model_devi_engine", [ Argument("lammps", dict, model_devi_lmp_args(), doc="LAMMPS"), - ], default="lammps", doc=doc_model_devi_engine) + ], default_tag="lammps", optional=True, doc=doc_model_devi_engine) # Labeling From cbb252f883e6611bf75b18604d81dbf87ccb2cd8 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 11 Jun 2022 04:15:50 -0400 Subject: [PATCH 9/9] fix error --- dpgen/generator/arginfo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 3d2affeb3..01bf4af61 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -175,11 +175,11 @@ def model_devi_lmp_args() -> List[Argument]: ] -def model_devi_args() -> Variant: +def model_devi_args() -> List[Variant]: doc_model_devi_engine = "Engine for the model deviation task." - return Variant("model_devi_engine", [ + return [Variant("model_devi_engine", [ Argument("lammps", dict, model_devi_lmp_args(), doc="LAMMPS"), - ], default_tag="lammps", optional=True, doc=doc_model_devi_engine) + ], default_tag="lammps", optional=True, doc=doc_model_devi_engine)] # Labeling