diff --git a/doc/troubleshooting/common-errors.md b/doc/troubleshooting/common-errors.md index d919415ca..6c3dc7438 100644 --- a/doc/troubleshooting/common-errors.md +++ b/doc/troubleshooting/common-errors.md @@ -28,4 +28,4 @@ Some common reasons are as follows: 2. You may have something wrong in your input files, which causes the process to fail. ## RuntimeError: find too many unsuccessfully terminated jobs. -The ratio of failed jobs is larger than ratio_failure. You can set a high value for ratio_failure or check if there is something wrong with your input files. \ No newline at end of file +The ratio of failed jobs is larger than ratio_failure. You can set a high value for ratio_failure or check if there is something wrong with your input files. diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 73bacc364..cbc6ec63c 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -278,6 +278,7 @@ def fp_style_gaussian_args() -> List[Argument]: doc_basis_set = 'Custom basis set.' doc_keywords_high_multiplicity = ('Keywords for points with multiple raicals. `multiplicity` should be `auto`. ' 'If not set, fallback to normal keywords.') + args = [ Argument("keywords", [str, list], @@ -302,6 +303,7 @@ def fp_style_gaussian_args() -> List[Argument]: 'hydrogen atom. When the outer atom is a hydrogen atom, the outer atom will be ' 'kept. In this case, other atoms out of the soft cutoff radius will be removed.') doc_fp_params_gaussian = 'Parameters for Gaussian calculation.' + doc_ratio_failed = 'Check the ratio of unsuccessfully terminated jobs. If too many FP tasks are not converged, RuntimeError will be raised.' return [ Argument("use_clusters", bool, optional=True, default=False, doc=doc_use_clusters), @@ -311,6 +313,8 @@ def fp_style_gaussian_args() -> List[Argument]: Argument("cluster_minify", bool, optional=True, default=False, doc=doc_cluster_minify), Argument("fp_params", dict, args, [], optional=False, doc=doc_fp_params_gaussian), + Argument("ratio_failed", float, optional=True, + doc=doc_ratio_failed), ] diff --git a/examples/run/dp2.x-lammps-gaussian/machine.json b/examples/run/dp2.x-lammps-gaussian/machine.json new file mode 100644 index 000000000..fc6b70431 --- /dev/null +++ b/examples/run/dp2.x-lammps-gaussian/machine.json @@ -0,0 +1,110 @@ +{ + "api_version": "1.0", + "deepmd_version": "2.1.0", + "train" : + { + "command": "dp", + "machine": { + "batch_type": "Lebesgue", + "context_type": "LebesgueContext", + "local_root" : "./", + "remote_profile":{ + "email": "huyongxin1015@163.com", + "password": "0415fnmsM.", + "program_id": 10330, + "keep_backup":true, + "input_data":{ + "job_type": "indicate", + "log_file": "00*/train.log", + "grouped":true, + "job_name": "dpgen_train_job", + "disk_size": 100, + "scass_type":"c4_m15_1 * NVIDIA T4", + "checkpoint_files":["00*/checkpoint","00*/model.ckpt*"], + "checkpoint_time":30, + "platform": "ali", + "image_name":"LBG_DeePMD-kit_2.1.0_v1", + "on_demand":0 + } + } + }, + "resources": { + "batch_type": "Lebesgue", + "number_node": 1, + "cpu_per_node": 4, + "gpu_per_node": 1, + "queue_name": "T4_4_15", + "group_size": 1 + } + }, + "model_devi": + { + "command": "lmp", + "machine": { + "batch_type": "Lebesgue", + "context_type": "LebesgueContext", + "local_root" : "./", + "remote_profile":{ + "email": "huyongxin1015@163.com", + "password": "0415fnmsM.", + "program_id": 10330, + "keep_backup":true, + "input_data":{ + "job_type": "indicate", + "log_file": "*/model_devi.log", + "grouped":true, + "job_name": "dpgen_model_devi_job", + "disk_size": 200, + "scass_type":"c4_m15_1 * NVIDIA T4", + "platform": "ali", + "image_name":"LBG_DeePMD-kit_2.1.0_v1", + "on_demand":0 + } + } + }, + "resources": { + "batch_type": "Lebesgue", + "number_node": 1, + "cpu_per_node": 4, + "gpu_per_node": 1, + "queue_name": "T4_4_15", + "group_size": 50 + } + }, + "fp": + { + "command": "g09 < input > output ||:", + "machine": { + "batch_type": "Lebesgue", + "context_type": "LebesgueContext", + "local_root" : "./", + "remote_profile":{ + "email": "huyongxin1015@163.com", + "password": "0415fnmsM.", + "program_id": 10330, + "input_data":{ + "api_version":2, + "job_type": "indicate", + "log_file": "task*/fp.log", + "grouped":true, + "job_name": "dpgen_fp_job", + "disk_size": 100, + "scass_type":"c16_m32_cpu", + "platform": "ali", + "image_name":"LBG_Gaussian_09_v2", + "on_demand":0 + } + } + }, + "resources": { + "batch_type": "Lebesgue", + "source_list": ["/root/g09.sh"], + "number_node": 1, + "cpu_per_node": 16, + "gpu_per_node": 0, + "queue_name": "CPU", + "group_size": 10 + } + } +} + diff --git a/examples/run/dp2.x-lammps-gaussian/param_C4H16N4_deepmd-kit-2.0.1.json b/examples/run/dp2.x-lammps-gaussian/param_C4H16N4_deepmd-kit-2.0.1.json new file mode 100644 index 000000000..a7bfd05dc --- /dev/null +++ b/examples/run/dp2.x-lammps-gaussian/param_C4H16N4_deepmd-kit-2.0.1.json @@ -0,0 +1,140 @@ +{ + "type_map": [ + "H", + "C", + "N" + ], + "mass_map": [ + 1, + 12, + 14 + ], + "init_data_prefix": "./data/deepmd/", + "init_data_sys": [ + "data.000","data.001","data.002","data.003","data.004","data.005" + ], + "init_batch_size": ["auto","auto","auto","auto","auto","auto"], + "sys_configs": [ + [ + "./data/md_sys/data.ch4n2" + ] + ], + "_comment": " that's all ", + "numb_models": 4, + "sys_format":"lammps/lmp", + "default_training_param": { + "model": { + "type_map": [ + "H", + "C", + "N" + ], + "descriptor": { + "type": "se_e2_a", + "sel": [ + 16, + 4, + 4 + ], + "rcut_smth": 0.5, + "rcut": 5.0, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": true, + "axis_neuron": 12, + "seed": 1 + }, + "fitting_net": { + "neuron": [ + 120, + 120, + 120 + ], + "resnet_dt": false, + "seed": 1 + } + }, + "learning_rate": { + "type": "exp", + "start_lr": 0.001, + "decay_steps": 200 + }, + "loss": { + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0.0, + "limit_pref_v": 0.0 + }, + "training": { + "_set_prefix": "set", + "stop_batch": 20000, + "_batch_size": 1, + "disp_file": "lcurve.out", + "disp_freq": 1000, + "numb_test": 4, + "save_freq": 1000, + "save_ckpt": "model.ckpt", + "disp_training": true, + "time_training": true, + "profiling": false, + "profiling_file": "timeline.json", + "_comment": "that's all" + } + }, + "model_devi_dt": 0.002, + "model_devi_skip": 0, + "model_devi_f_trust_lo": 0.02, + "model_devi_f_trust_hi": 1.95, + "model_devi_clean_traj": true, + "model_devi_jobs": [ + { + "sys_idx": [ + 0 + ], + "temps": [ + 100 + ], + "press": [ + 1 + ], + "trj_freq": 10, + "nsteps": 2000, + "ensemble": "nvt", + "_idx": "00" + }, + { + "sys_idx": [ + 0 + ], + "temps": [ + 100 + ], + "press": [ + 1 + ], + "trj_freq": 10, + "nsteps": 5000, + "ensemble": "nvt", + "_idx": "01" + } + + ], + "use_clusters": true, + "cluster_cutoff": 5.0, + "cluster_minify": true, + "use_relative": true, + "epsilon": 1.0, + "ratio_failed": 0.20, + "fp_style": "gaussian", + "shuffle_poscar": false, + "fp_task_max": 20, + "fp_task_min": 5, + "fp_params":{ + "keywords": "force B3LYP 6-31g(d,p) nosymm", + "nproc":2 , + "multiplicity": "auto" + } +} diff --git a/tests/test_check_examples.py b/tests/test_check_examples.py index fa2bdf7fa..9f97497fb 100644 --- a/tests/test_check_examples.py +++ b/tests/test_check_examples.py @@ -55,6 +55,7 @@ (run_jdata, p_examples / "run" / "deprecated" / "dp0.12-lammps-siesta" / "dp-lammps-siesta" / "CH4" / "param_CH4.json"), (run_jdata, p_examples / "run" / "deprecated" / "dp0.12-lammps-vasp" / "Al" / "param_al_all_gpu.json"), (run_jdata, p_examples / "run" / "deprecated" / "dp0.12-lammps-vasp" / "CH4" / "param_CH4.json"), + (run_jdata, p_examples / "run" / "dp2.x-lammps-gaussian" / "param_C4H16N4_deepmd-kit-2.0.1.json"), # machines #(run_mdata, p_examples / "machine" / "DeePMD-kit-2.x" / "lebesgue_v2_machine.json"), #(run_mdata, p_examples / "machine" / "DeePMD-kit-1.x" / "machine-ali.json"), @@ -72,6 +73,7 @@ (run_mdata, p_examples / "CH4-refact-dpdispatcher" / "machine-dpcloudserver.json"), (run_mdata, p_examples / "run" / "dp2.x-lammps-ABACUS-lcao" / "fcc-al" / "machine.json"), (run_mdata, p_examples / "run" / "dp2.x-lammps-ABACUS-pw" / "fcc-al" / "machine.json"), + (run_mdata, p_examples / "run" / "dp2.x-lammps-gaussian" / "machine.json"), #(run_mdata, p_examples / "run" / "dp2.x-gromacs-gaussian" / "machine.json"), (simplify_mdata, p_examples / "simplify-MAPbI3-scan-lebesgue" / "simplify_example" / "machine.json"), )