diff --git a/dpdata/fhi_aims/output.py b/dpdata/fhi_aims/output.py index 4ee819c3b..1a1b2c579 100755 --- a/dpdata/fhi_aims/output.py +++ b/dpdata/fhi_aims/output.py @@ -1,5 +1,6 @@ import numpy as np import re +import warnings latt_patt="\|\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)" pos_patt_first="\|\s+[0-9]{1,}[:]\s\w+\s(\w+)(\s.*[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)" @@ -63,7 +64,7 @@ def get_fhi_aims_block(fp) : return blk return blk -def get_frames (fname, md=True, begin = 0, step = 1) : +def get_frames (fname, md=True, begin = 0, step = 1, convergence_check=True) : fp = open(fname) blk = get_fhi_aims_block(fp) ret = get_info(blk, type_idx_zero = True) @@ -78,6 +79,7 @@ def get_frames (fname, md=True, begin = 0, step = 1) : all_virials = [] cc = 0 + rec_failed = [] while len(blk) > 0 : if debug: with open(str(cc),'w') as f: @@ -87,9 +89,9 @@ def get_frames (fname, md=True, begin = 0, step = 1) : coord, _cell, energy, force, virial, is_converge = analyze_block(blk, first_blk=True, md=md) else: coord, _cell, energy, force, virial, is_converge = analyze_block(blk, first_blk=False) - if is_converge : - if len(coord) == 0: - break + if len(coord) == 0: + break + if is_converge or not convergence_check: all_coords.append(coord) if _cell: @@ -101,9 +103,16 @@ def get_frames (fname, md=True, begin = 0, step = 1) : all_forces.append(force) if virial is not None : all_virials.append(virial) + if not is_converge: + rec_failed.append(cc+1) + blk = get_fhi_aims_block(fp) cc += 1 + if len(rec_failed) > 0 : + prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." + warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) + if len(all_virials) == 0 : all_virials = None else : diff --git a/dpdata/plugins/fhi_aims.py b/dpdata/plugins/fhi_aims.py index 96e000c6b..b1805c4ef 100644 --- a/dpdata/plugins/fhi_aims.py +++ b/dpdata/plugins/fhi_aims.py @@ -4,7 +4,7 @@ @Format.register("fhi_aims/md") @Format.register("fhi_aims/output") class FhiMDFormat(Format): - def from_labeled_system(self, file_name, md=True, begin = 0, step = 1, **kwargs): + def from_labeled_system(self, file_name, md=True, begin = 0, step = 1, convergence_check=True, **kwargs): data = {} data['atom_names'], \ data['atom_numbs'], \ @@ -14,7 +14,7 @@ def from_labeled_system(self, file_name, md=True, begin = 0, step = 1, **kwargs) data['energies'], \ data['forces'], \ tmp_virial, \ - = dpdata.fhi_aims.output.get_frames(file_name, md = md, begin = begin, step = step) + = dpdata.fhi_aims.output.get_frames(file_name, md = md, begin = begin, step = step, convergence_check=convergence_check) if tmp_virial is not None : data['virials'] = tmp_virial return data diff --git a/dpdata/plugins/pwmat.py b/dpdata/plugins/pwmat.py index 7756e0c5c..3365806e5 100644 --- a/dpdata/plugins/pwmat.py +++ b/dpdata/plugins/pwmat.py @@ -11,7 +11,7 @@ @Format.register("pwmat/output") class PwmatOutputFormat(Format): @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): + def from_labeled_system(self, file_name, begin=0, step=1, convergence_check=True, **kwargs): data = {} data['atom_names'], \ data['atom_numbs'], \ @@ -21,7 +21,7 @@ def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): data['energies'], \ data['forces'], \ tmp_virial \ - = dpdata.pwmat.movement.get_frames(file_name, begin=begin, step=step) + = dpdata.pwmat.movement.get_frames(file_name, begin=begin, step=step, convergence_check=convergence_check) if tmp_virial is not None: data['virials'] = tmp_virial # scale virial to the unit of eV diff --git a/dpdata/plugins/vasp.py b/dpdata/plugins/vasp.py index 0e0475151..07ec34f17 100644 --- a/dpdata/plugins/vasp.py +++ b/dpdata/plugins/vasp.py @@ -54,7 +54,7 @@ def to_system(self, data, frame_idx=0, **kwargs): @Format.register("vasp/outcar") class VASPOutcarFormat(Format): @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): + def from_labeled_system(self, file_name, begin=0, step=1, convergence_check=True, **kwargs): data = {} ml = kwargs.get("ml", False) data['atom_names'], \ @@ -65,7 +65,7 @@ def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): data['energies'], \ data['forces'], \ tmp_virial, \ - = dpdata.vasp.outcar.get_frames(file_name, begin=begin, step=step, ml=ml) + = dpdata.vasp.outcar.get_frames(file_name, begin=begin, step=step, ml=ml, convergence_check=convergence_check) if tmp_virial is not None: data['virials'] = tmp_virial # scale virial to the unit of eV diff --git a/dpdata/pwmat/movement.py b/dpdata/pwmat/movement.py index 121f38855..c39950f0a 100644 --- a/dpdata/pwmat/movement.py +++ b/dpdata/pwmat/movement.py @@ -1,5 +1,6 @@ import numpy as np from ..periodic_table import ELEMENTS +import warnings def system_info (lines, type_idx_zero = False) : atom_names = [] @@ -49,7 +50,7 @@ def get_movement_block(fp) : return blk # we assume that the force is printed ... -def get_frames (fname, begin = 0, step = 1) : +def get_frames (fname, begin = 0, step = 1, convergence_check=True) : fp = open(fname) blk = get_movement_block(fp) @@ -64,20 +65,28 @@ def get_frames (fname, begin = 0, step = 1) : all_virials = [] cc = 0 + rec_failed = [] while len(blk) > 0 : if cc >= begin and (cc - begin) % step == 0 : coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm) - if is_converge : - if len(coord) == 0: - break + if len(coord) == 0: + break + if is_converge or not convergence_check: all_coords.append(coord) all_cells.append(cell) all_energies.append(energy) all_forces.append(force) if virial is not None : all_virials.append(virial) + if not is_converge: + rec_failed.append(cc+1) + blk = get_movement_block(fp) cc += 1 + + if len(rec_failed) > 0 : + prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." + warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) if len(all_virials) == 0 : all_virials = None diff --git a/dpdata/system.py b/dpdata/system.py index 1dbbd2118..0600f5634 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -176,6 +176,7 @@ def __init__ (self, begin = 0, step = 1, data = None, + convergence_check = True, **kwargs) : """ Constructor @@ -192,14 +193,49 @@ def __init__ (self, - ``deepmd/raw``: deepmd-kit raw - ``deepmd/npy``: deepmd-kit compressed format (numpy binary) - ``vasp/poscar``: vasp POSCAR + - ``vasp/contcar``: vasp contcar + - ``vasp/string``: vasp string + - ``vasp/outcar``: vasp outcar + - ``vasp/xml``: vasp xml - ``qe/cp/traj``: Quantum Espresso CP trajectory files. should have: file_name+'.in' and file_name+'.pos' - ``qe/pw/scf``: Quantum Espresso PW single point calculations. Both input and output files are required. If file_name is a string, it denotes the output file name. Input file name is obtained by replacing 'out' by 'in' from file_name. Or file_name is a list, with the first element being the input file name and the second element being the output filename. - ``abacus/scf``: ABACUS pw/lcao scf. The directory containing INPUT file is required. - ``abacus/md``: ABACUS pw/lcao MD. The directory containing INPUT file is required. - - ``abacus/relax``: ABACUS pw/lcao relax or cell-relax. The directory containing INPUT file is required. + - ``abacus/relax``: ABACUS pw/lcao relax or cell-relax. The directory containing INPUT file is required. + - ``abacus/stru``: abacus stru + - ``abacus/lcao/scf``: abacus lcao scf + - ``abacus/pw/scf``: abacus pw scf + - ``abacus/lcao/md``: abacus lcao md + - ``abacus/pw/md``: abacus pw md + - ``abacus/lcao/relax``: abacus lcao relax + - ``abacus/pw/relax``: abacus pw relax - ``siesta/output``: siesta SCF output file - ``siesta/aimd_output``: siesta aimd output file - ``pwmat/atom.config``: pwmat atom.config + - ``pwmat/movement``: pwmat movement + - ``pwmat/output``: pwmat output + - ``pwmat/mlmd``: pwmat mlmd + - ``pwmat/final.config``: pwmat final.config + - ``quip/gap/xyz_file``: quip gap xyz_file + - ``quip/gap/xyz``: quip gap xyz + - ``fhi_aims/output``: fhi_aims output + - ``fhi_aims/md``: fhi_aims md + - ``fhi_aims/scf``: fhi_aims scf + - ``pymatgen/structure``: pymatgen structure + - ``pymatgen/molecule``: pymatgen molecule + - ``pymatgen/computedstructureentry``: pymatgen computedstructureentry + - ``amber/md``: amber md + - ``sqm/out``: sqm out + - ``sqm/in``: sqm in + - ``ase/structure``: ase structure + - ``gaussian/log``: gaussian log + - ``gaussian/md``: gaussian md + - ``gaussian/gjf``: gaussian gjf + - ``deepmd/comp``: deepmd comp + - ``deepmd/hdf5``: deepmd hdf5 + - ``gromacs/gro``: gromacs gro + - ``cp2k/aimd_output``: cp2k aimd_output + - ``cp2k/output``: cp2k output type_map : list of str Needed by formats lammps/lmp and lammps/dump. Maps atom type to name. The atom with type `ii` is mapped to `type_map[ii]`. If not provided the atom names are assigned to `'Type_1'`, `'Type_2'`, `'Type_3'`... @@ -208,7 +244,9 @@ def __init__ (self, step : int The number of skipped frames when loading MD trajectory. data : dict - The raw data of System class. + The raw data of System class. + convergence_check : boolean + Whether to request a convergence check. """ self.data = {} self.data['atom_numbs'] = [] @@ -224,7 +262,7 @@ def __init__ (self, return if file_name is None : return - self.from_fmt(file_name, fmt, type_map=type_map, begin= begin, step=step, **kwargs) + self.from_fmt(file_name, fmt, type_map=type_map, begin= begin, step=step, convergence_check=convergence_check, **kwargs) if type_map is not None: self.apply_type_map(type_map) diff --git a/dpdata/vasp/outcar.py b/dpdata/vasp/outcar.py index cae1dd158..3e32a1461 100644 --- a/dpdata/vasp/outcar.py +++ b/dpdata/vasp/outcar.py @@ -1,5 +1,6 @@ import numpy as np import re +import warnings def system_info(lines, type_idx_zero = False): atom_names = [] @@ -52,7 +53,7 @@ def get_outcar_block(fp, ml = False): return blk # we assume that the force is printed ... -def get_frames(fname, begin = 0, step = 1, ml = False): +def get_frames(fname, begin = 0, step = 1, ml = False, convergence_check=True): fp = open(fname) blk = get_outcar_block(fp) @@ -66,22 +67,29 @@ def get_frames(fname, begin = 0, step = 1, ml = False): all_virials = [] cc = 0 + rec_failed = [] while len(blk) > 0 : if cc >= begin and (cc - begin) % step == 0 : coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm, ml) - if is_converge : - if len(coord) == 0: - break + if len(coord) == 0: + break + if is_converge or not convergence_check: all_coords.append(coord) all_cells.append(cell) all_energies.append(energy) all_forces.append(force) if virial is not None : all_virials.append(virial) + if not is_converge: + rec_failed.append(cc+1) blk = get_outcar_block(fp, ml) cc += 1 - + + if len(rec_failed) > 0 : + prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." + warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) + if len(all_virials) == 0 : all_virials = None else :