diff --git a/.github/ISSUE_TEMPLATE/request-for-help.md b/.github/ISSUE_TEMPLATE/request-for-help.md index 397d2b02a..ad05a34d3 100644 --- a/.github/ISSUE_TEMPLATE/request-for-help.md +++ b/.github/ISSUE_TEMPLATE/request-for-help.md @@ -13,7 +13,7 @@ Before asking questions, you can search the previous issues or discussions check the [README](https://github.com/deepmodeling/dpdata/#readme). -Please **do not** post requests for help (e.g. with installing or using dpdata) here. +Please **do not** post requests for help (e.g. with installing or using dpdata) here. Instead go to [discussions](https://github.com/deepmodeling/dpdata/discussions). This issue tracker is for tracking dpdata development related issues only. diff --git a/.github/workflows/pub-pypi.yml b/.github/workflows/pub-pypi.yml index a276f85be..24f99f7f9 100644 --- a/.github/workflows/pub-pypi.yml +++ b/.github/workflows/pub-pypi.yml @@ -36,4 +36,3 @@ jobs: uses: pypa/gh-action-pypi-publish@master with: password: ${{ secrets.PYPI_API_TOKEN }} - diff --git a/.github/workflows/test_import.yml b/.github/workflows/test_import.yml index 34bc23be7..b04d05cde 100644 --- a/.github/workflows/test_import.yml +++ b/.github/workflows/test_import.yml @@ -15,4 +15,3 @@ jobs: architecture: 'x64' - run: python -m pip install . - run: python -c 'import dpdata' - diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..83769dfbf --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,25 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + # there are many log files in tests + # TODO: seperate py files and log files + - id: trailing-whitespace + exclude: "^tests/.*$" + - id: end-of-file-fixer + exclude: "^tests/.*$" + - id: check-yaml + - id: check-json + - id: check-added-large-files + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml +# Python +- repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black-jupyter +ci: + autoupdate_branch: devel diff --git a/README.md b/README.md index c93221ad8..9c9fe53be 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ dpdata only works with python 3.7 or above. # Installation -One can download the source code of dpdata by +One can download the source code of dpdata by ```bash git clone https://github.com/deepmodeling/dpdata.git dpdata ``` @@ -25,10 +25,10 @@ This section gives some examples on how dpdata works. Firstly one needs to impor ```python import dpdata ``` -The typicall workflow of `dpdata` is +The typicall workflow of `dpdata` is 1. Load data from vasp or lammps or deepmd-kit data files. -2. Manipulate data +2. Manipulate data 3. Dump data to in a desired format @@ -41,9 +41,9 @@ or let dpdata infer the format (`vasp/poscar`) of the file from the file name ex d_poscar = dpdata.System('my.POSCAR') ``` The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data `System` called `d_poscar`. -A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`. +A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`. It is noted that `POSCAR` only contains one frame. -If the multiple frames stored in, for example, a `OUTCAR` is wanted, +If the multiple frames stored in, for example, a `OUTCAR` is wanted, ```python d_outcar = dpdata.LabeledSystem('OUTCAR') ``` @@ -53,9 +53,9 @@ The `System` or `LabeledSystem` can be constructed from the following file forma | Software| format | multi frames | labeled | class | format key | | ------- | :--- | :---: | :---: | :--- | :--- | -| vasp | poscar | False | False | System | 'vasp/poscar' | -| vasp | outcar | True | True | LabeledSystem | 'vasp/outcar' | -| vasp | xml | True | True | LabeledSystem | 'vasp/xml' | +| vasp | poscar | False | False | System | 'vasp/poscar' | +| vasp | outcar | True | True | LabeledSystem | 'vasp/outcar' | +| vasp | xml | True | True | LabeledSystem | 'vasp/xml' | | lammps | lmp | False | False | System | 'lammps/lmp' | | lammps | dump | True | False | System | 'lammps/dump' | | deepmd | raw | True | False | System | 'deepmd/raw' | @@ -89,7 +89,7 @@ The `System` or `LabeledSystem` can be constructed from the following file forma The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems. -Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory +Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory Recursively and find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports. Use `dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats. @@ -148,7 +148,7 @@ coords = d_outcar['coords'] ``` Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system) -| key | type | dimension | are labels | description +| key | type | dimension | are labels | description | --- | --- | --- | --- | --- | 'atom_names' | list of str | ntypes | False | The name of each atom type | 'atom_numbs' | list of int | ntypes | False | The number of atoms of each atom type @@ -186,7 +186,7 @@ dpdata.LabeledSystem('OUTCAR').sub_system([0,-1]).to('deepmd/raw', 'dpmd_raw') by which only the first and last frames are dumped to `dpmd_raw`. -## replicate +## replicate dpdata will create a super cell of the current atom configuration. ```python dpdata.System('./POSCAR').replicate((1,2,3,) ) @@ -197,9 +197,9 @@ tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in ## perturb By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). ```python -perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3, - cell_pert_fraction=0.05, - atom_pert_distance=0.6, +perturbed_system = dpdata.System('./POSCAR').perturb(pert_num=3, + cell_pert_fraction=0.05, + atom_pert_distance=0.6, atom_pert_style='normal') print(perturbed_system.data) ``` @@ -213,7 +213,7 @@ s.to_vasp_poscar('POSCAR.P42nmc.replace') ``` # BondOrderSystem -A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). +A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). ```python import dpdata system_1 = dpdata.BondOrderSystem("tests/bond_order/CH3OH.mol", fmt="mol") # read from .mol file @@ -242,7 +242,7 @@ According to our test, our sanitization procedure can successfully read 4852 sma ```python import dpdata - + for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): syst = dpdata.BondOrderSystem(sdf_file, sanitize_level='high', verbose=False) ``` diff --git a/docs/Makefile b/docs/Makefile index 1c9a12fe3..5970ce25e 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -17,4 +17,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/conf.py b/docs/conf.py index 7931ff700..eabf1c845 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,19 +16,20 @@ import sys import subprocess as sp from datetime import date -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) # -- Project information ----------------------------------------------------- -project = 'dpdata' -copyright = '2019-%d, DeepModeling ' % date.today().year -author = 'Han Wang' +project = "dpdata" +copyright = "2019-%d, DeepModeling " % date.today().year +author = "Han Wang" # The short X.Y version -version = '0.0' +version = "0.0" # The full version, including alpha/beta/rc tags -release = '0.0.0-rc' +release = "0.0.0-rc" # -- General configuration --------------------------------------------------- @@ -41,27 +42,27 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'deepmodeling_sphinx', - 'sphinx_rtd_theme', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx.ext.intersphinx', - 'numpydoc', - 'm2r2', - 'sphinxarg.ext', + "deepmodeling_sphinx", + "sphinx_rtd_theme", + "sphinx.ext.mathjax", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "numpydoc", + "m2r2", + "sphinxarg.ext", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = ['.rst', '.md'] +source_suffix = [".rst", ".md"] # The master toctree document. -master_doc = 'index' +master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -73,10 +74,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path . -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # -- Options for HTML output ------------------------------------------------- @@ -84,7 +85,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -95,7 +96,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -111,7 +112,7 @@ # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'dpdatadoc' +htmlhelp_basename = "dpdatadoc" # -- Options for LaTeX output ------------------------------------------------ @@ -120,15 +121,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -138,8 +136,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'dpdata.tex', 'dpdata Documentation', - 'Han Wang', 'manual'), + (master_doc, "dpdata.tex", "dpdata Documentation", "Han Wang", "manual"), ] @@ -147,10 +144,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'dpdata', 'dpdata Documentation', - [author], 1) -] +man_pages = [(master_doc, "dpdata", "dpdata Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- @@ -159,26 +153,47 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'dpdata', 'dpdata Documentation', - author, 'dpdata', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "dpdata", + "dpdata Documentation", + author, + "dpdata", + "One line description of project.", + "Miscellaneous", + ), ] # -- Extension configuration ------------------------------------------------- def run_apidoc(_): from sphinx.ext.apidoc import main - sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + + sys.path.append(os.path.join(os.path.dirname(__file__), "..")) cur_dir = os.path.abspath(os.path.dirname(__file__)) module = os.path.join(cur_dir, "..", "dpdata") - main(['-M', '--tocfile', 'api', '-H', 'API documentation', '-o', os.path.join(cur_dir, "api"), module, '--force']) + main( + [ + "-M", + "--tocfile", + "api", + "-H", + "API documentation", + "-o", + os.path.join(cur_dir, "api"), + module, + "--force", + ] + ) + def run_formats(_): sp.check_output([sys.executable, "make_format.py"]) + def setup(app): - app.connect('builder-inited', run_apidoc) - app.connect('builder-inited', run_formats) + app.connect("builder-inited", run_apidoc) + app.connect("builder-inited", run_formats) intersphinx_mapping = { diff --git a/docs/credits.rst b/docs/credits.rst index a72b83e5a..54fd98842 100644 --- a/docs/credits.rst +++ b/docs/credits.rst @@ -1,4 +1,4 @@ Authors ======= -.. git-shortlog-authors:: \ No newline at end of file +.. git-shortlog-authors:: diff --git a/docs/formats.rst b/docs/formats.rst index 1920a8489..c0ff5b8f9 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -6,4 +6,3 @@ dpdata supports the following formats: .. csv-table:: Supported Formats :file: formats.csv :header-rows: 1 - diff --git a/docs/make_format.py b/docs/make_format.py index 61fc10a46..ae8002c13 100644 --- a/docs/make_format.py +++ b/docs/make_format.py @@ -13,16 +13,17 @@ def get_formats() -> dict: formats[ff].append(kk) return formats + def detect_overridden(cls: Format, method: str) -> bool: """Check whether a method is override - + Parameters ---------- cls : Format a format method : str method name - + Returns ------- bool @@ -30,38 +31,45 @@ def detect_overridden(cls: Format, method: str) -> bool: """ return method in cls.__dict__ + def get_cls_link(cls: object) -> str: """Returns class link. - + Parameters ---------- cls : object the class - + Returns ------- str the link of a class """ - return ':class:`%s <%s>`' % (cls.__name__, ".".join([cls.__module__, cls.__name__])) + return ":class:`%s <%s>`" % (cls.__name__, ".".join([cls.__module__, cls.__name__])) + def check_supported(fmt: Format): methods = set() for mtd in [ - 'from_system', 'to_system', - 'from_labeled_system', 'to_labeled_system', - 'from_bond_order_system', 'to_bond_order_system', - 'from_multi_systems', 'to_multi_systems', - ]: + "from_system", + "to_system", + "from_labeled_system", + "to_labeled_system", + "from_bond_order_system", + "to_bond_order_system", + "from_multi_systems", + "to_multi_systems", + ]: if detect_overridden(fmt, mtd): methods.add(mtd) - if mtd == 'to_system': - methods.add('to_labeled_system') + if mtd == "to_system": + methods.add("to_labeled_system") if fmt.MultiMode != fmt.MultiModes.NotImplemented: - methods.add('from_multi_systems') - methods.add('to_multi_systems') + methods.add("from_multi_systems") + methods.add("to_multi_systems") return methods + method_links = { "from_system": ":func:`System() `", "to_system": ":func:`System.to() `", @@ -75,16 +83,22 @@ def check_supported(fmt: Format): if __name__ == "__main__": formats = get_formats() - with open('formats.csv', 'w', newline='') as csvfile: + with open("formats.csv", "w", newline="") as csvfile: fieldnames = [ - 'Class', 'Alias', 'Supported Functions', - ] + "Class", + "Alias", + "Supported Functions", + ] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for kk, vv in formats.items(): - writer.writerow({ - 'Class': get_cls_link(kk), - 'Alias': '\n'.join(('``%s``' % vvv for vvv in vv)), - 'Supported Functions': '\n'.join(method_links[mtd] for mtd in check_supported(kk)), - }) + writer.writerow( + { + "Class": get_cls_link(kk), + "Alias": "\n".join(("``%s``" % vvv for vvv in vv)), + "Supported Functions": "\n".join( + method_links[mtd] for mtd in check_supported(kk) + ), + } + ) diff --git a/dpdata/__about__.py b/dpdata/__about__.py index 2c6b9a137..d5cfca647 100644 --- a/dpdata/__about__.py +++ b/dpdata/__about__.py @@ -1 +1 @@ -__version__ = 'unknown' +__version__ = "unknown" diff --git a/dpdata/__init__.py b/dpdata/__init__.py index 001777a30..f426b7903 100644 --- a/dpdata/__init__.py +++ b/dpdata/__init__.py @@ -14,11 +14,10 @@ try: # prevent conflict with dpdata.rdkit import rdkit as _ + USE_RDKIT = True except ModuleNotFoundError: USE_RDKIT = False if USE_RDKIT: from .bond_order_system import BondOrderSystem - - diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py index 253e48a2f..be6bee47d 100644 --- a/dpdata/abacus/md.py +++ b/dpdata/abacus/md.py @@ -1,7 +1,15 @@ from ast import dump -import os,sys +import os, sys import numpy as np -from .scf import ry2ev, bohr2ang, kbar2evperang3, get_block, get_geometry_in, get_cell, get_coords +from .scf import ( + ry2ev, + bohr2ang, + kbar2evperang3, + get_block, + get_geometry_in, + get_cell, + get_coords, +) import re import warnings @@ -9,28 +17,31 @@ # The atomic coordinates are read in from generated files in OUT.XXXX. # Energies, forces # IMPORTANT: the program defaultly takes STRU input file as standard cell information, -# therefore the direct and cartesan coordinates read could be different from the ones in +# therefore the direct and cartesan coordinates read could be different from the ones in # the output cif files!!! # It is highly recommanded to use ORTHOGANAL coordinates in STRU file if you wish to get -# same coordinates in both dpdata and output cif files. +# same coordinates in both dpdata and output cif files. + def get_path_out(fname, inlines): # This function is different from the same-name function in scf.py. # This function returns OUT.XXXX's base directory. path_out = os.path.join(fname, "OUT.ABACUS/") for line in inlines: - if len(line)>0 and "suffix" in line and "suffix"==line.split()[0]: - suffix = line.split()[1] - path_out = os.path.join(fname, "OUT.%s/" % suffix) - break + if len(line) > 0 and "suffix" in line and "suffix" == line.split()[0]: + suffix = line.split()[1] + path_out = os.path.join(fname, "OUT.%s/" % suffix) + break return path_out + def get_coord_dump_freq(inlines): for line in inlines: - if len(line)>0 and "md_dumpfreq" in line and "md_dumpfreq" == line.split()[0]: + if len(line) > 0 and "md_dumpfreq" in line and "md_dumpfreq" == line.split()[0]: return int(line.split()[1]) return 1 + def get_coords_from_dump(dumplines, natoms): nlines = len(dumplines) total_natoms = sum(natoms) @@ -38,13 +49,18 @@ def get_coords_from_dump(dumplines, natoms): if "VIRIAL" in dumplines[6]: calc_stress = True else: - assert("POSITIONS" in dumplines[6] and "FORCE" in dumplines[6]), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check." + assert ( + "POSITIONS" in dumplines[6] and "FORCE" in dumplines[6] + ), "keywords 'POSITIONS' and 'FORCE' cannot be found in the 6th line. Please check." nframes_dump = -1 if calc_stress: - nframes_dump = int(nlines/(total_natoms + 13)) + nframes_dump = int(nlines / (total_natoms + 13)) else: - nframes_dump = int(nlines/(total_natoms + 9)) - assert(nframes_dump > 0), "Number of lines in MD_dump file = %d. Number of atoms = %d. The MD_dump file is incomplete."%(nlines, total_natoms) + nframes_dump = int(nlines / (total_natoms + 9)) + assert nframes_dump > 0, ( + "Number of lines in MD_dump file = %d. Number of atoms = %d. The MD_dump file is incomplete." + % (nlines, total_natoms) + ) cells = np.zeros([nframes_dump, 3, 3]) stresses = np.zeros([nframes_dump, 3, 3]) forces = np.zeros([nframes_dump, total_natoms, 3]) @@ -53,102 +69,154 @@ def get_coords_from_dump(dumplines, natoms): for iline in range(nlines): if "MDSTEP" in dumplines[iline]: # read in LATTICE_CONSTANT - celldm = float(dumplines[iline+1].split(" ")[-1]) + celldm = float(dumplines[iline + 1].split(" ")[-1]) # read in LATTICE_VECTORS for ix in range(3): - cells[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+3+ix])[-3:]]) * celldm + cells[iframe, ix] = ( + np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 3 + ix])[-3:] + ] + ) + * celldm + ) if calc_stress: - stresses[iframe, ix] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+ix])[-3:]]) + stresses[iframe, ix] = np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 7 + ix])[-3:] + ] + ) for iat in range(total_natoms): if calc_stress: - coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-6:-3]])*celldm - forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+11+iat])[-3:]]) + coords[iframe, iat] = ( + np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 11 + iat])[ + -6:-3 + ] + ] + ) + * celldm + ) + forces[iframe, iat] = np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 11 + iat])[-3:] + ] + ) else: - coords[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-6:-3]])*celldm - forces[iframe, iat] = np.array([float(i) for i in re.split('\s+', dumplines[iline+7+iat])[-3:]]) + coords[iframe, iat] = ( + np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 7 + iat])[ + -6:-3 + ] + ] + ) + * celldm + ) + forces[iframe, iat] = np.array( + [ + float(i) + for i in re.split("\s+", dumplines[iline + 7 + iat])[-3:] + ] + ) iframe += 1 - assert(iframe == nframes_dump), "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."%(iframe, nframes_dump) + assert iframe == nframes_dump, ( + "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump." + % (iframe, nframes_dump) + ) cells *= bohr2ang coords *= bohr2ang stresses *= kbar2evperang3 return coords, cells, forces, stresses + def get_energy(outlines, ndump, dump_freq): energy = [] nenergy = 0 for line_idx, line in enumerate(outlines): if "final etot is" in line: - if nenergy%dump_freq == 0: + if nenergy % dump_freq == 0: energy.append(float(line.split()[-2])) - nenergy+=1 + nenergy += 1 elif "!! convergence has not been achieved" in line: - if nenergy%dump_freq == 0: + if nenergy % dump_freq == 0: energy.append(np.nan) - nenergy+=1 - assert(ndump == len(energy)), "Number of total energies in running_md.log = %d. Number of frames in MD_dump = %d. Please check."%(len(energy), ndump) + nenergy += 1 + assert ndump == len(energy), ( + "Number of total energies in running_md.log = %d. Number of frames in MD_dump = %d. Please check." + % (len(energy), ndump) + ) energy = np.array(energy) return energy -def get_frame (fname): +def get_frame(fname): if type(fname) == str: - # if the input parameter is only one string, it is assumed that it is the + # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: - raise RuntimeError('invalid input') - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU - path_out = get_path_out(fname, inlines) + raise RuntimeError("invalid input") + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU + path_out = get_path_out(fname, inlines) - with open(geometry_path_in, 'r') as fp: - geometry_inlines = fp.read().split('\n') - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) + with open(geometry_path_in, "r") as fp: + geometry_inlines = fp.read().split("\n") + celldm, cell = get_cell(geometry_inlines) + atom_names, natoms, types, coords = get_coords( + celldm, cell, geometry_inlines, inlines + ) # This coords is not to be used. - dump_freq = get_coord_dump_freq(inlines = inlines) - #ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0]) + dump_freq = get_coord_dump_freq(inlines=inlines) + # ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0]) # number of dumped geometry files - #coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell) - with open(os.path.join(path_out, "MD_dump"), 'r') as fp: - dumplines = fp.read().split('\n') + # coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell) + with open(os.path.join(path_out, "MD_dump"), "r") as fp: + dumplines = fp.read().split("\n") coords, cells, force, stress = get_coords_from_dump(dumplines, natoms) ndump = np.shape(coords)[0] - with open(os.path.join(path_out, "running_md.log"), 'r') as fp: - outlines = fp.read().split('\n') + with open(os.path.join(path_out, "running_md.log"), "r") as fp: + outlines = fp.read().split("\n") energy = get_energy(outlines, ndump, dump_freq) - unconv_stru = '' - for i,iene in enumerate(energy): + unconv_stru = "" + for i, iene in enumerate(energy): if np.isnan(iene): - coords = np.delete(coords,i-ndump,axis=0) - cells = np.delete(cells,i-ndump,axis=0) - force = np.delete(force,i-ndump,axis=0) - stress = np.delete(stress,i-ndump,axis=0) - energy = np.delete(energy,i-ndump,axis=0) + coords = np.delete(coords, i - ndump, axis=0) + cells = np.delete(cells, i - ndump, axis=0) + force = np.delete(force, i - ndump, axis=0) + stress = np.delete(stress, i - ndump, axis=0) + energy = np.delete(energy, i - ndump, axis=0) unconv_stru += "%d " % i ndump = len(energy) - if unconv_stru != '': - warnings.warn(f"Structure %s are unconverged and not collected!" % unconv_stru) + if unconv_stru != "": + warnings.warn(f"Structure %s are unconverged and not collected!" % unconv_stru) for iframe in range(ndump): stress[iframe] *= np.linalg.det(cells[iframe, :, :].reshape([3, 3])) if np.sum(np.abs(stress[0])) < 1e-10: stress = None data = {} - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - data['cells'] = cells - #for idx in range(ndump): + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + data["cells"] = cells + # for idx in range(ndump): # data['cells'][:, :, :] = cell - data['coords'] = coords - data['energies'] = energy - data['forces'] = force - data['virials'] = stress - if type(data['virials']) != np.ndarray: - del data['virials'] - data['orig'] = np.zeros(3) + data["coords"] = coords + data["energies"] = energy + data["forces"] = force + data["virials"] = stress + if type(data["virials"]) != np.ndarray: + del data["virials"] + data["orig"] = np.zeros(3) return data diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py index 88ea81531..6b5521749 100644 --- a/dpdata/abacus/relax.py +++ b/dpdata/abacus/relax.py @@ -1,22 +1,24 @@ -import os,sys +import os, sys import numpy as np from .scf import bohr2ang, kbar2evperang3, get_geometry_in, get_cell, get_coords -# Read in geometries from an ABACUS RELAX(CELL-RELAX) trajectory in OUT.XXXX/runnning_relax/cell-relax.log. +# Read in geometries from an ABACUS RELAX(CELL-RELAX) trajectory in OUT.XXXX/runnning_relax/cell-relax.log. + def get_log_file(fname, inlines): suffix = "ABACUS" calculation = "scf" for line in inlines: - if "suffix" in line and "suffix"==line.split()[0]: - suffix = line.split()[1] + if "suffix" in line and "suffix" == line.split()[0]: + suffix = line.split()[1] elif "calculation" in line and "calculation" == line.split()[0]: calculation = line.split()[1] - logf = os.path.join(fname, "OUT.%s/running_%s.log"%(suffix,calculation)) + logf = os.path.join(fname, "OUT.%s/running_%s.log" % (suffix, calculation)) return logf -def get_coords_from_log(loglines,natoms): - ''' + +def get_coords_from_log(loglines, natoms): + """ NOTICE: unit of coords and cells is Angstrom order: coordinate @@ -24,116 +26,134 @@ def get_coords_from_log(loglines,natoms): energy (no output, if SCF is not converged) force (no output, if cal_force is not setted or abnormal ending) stress (no output, if set cal_stress is not setted or abnormal ending) - ''' + """ natoms_log = 0 for line in loglines: if line[13:41] == "number of atom for this type": natoms_log += int(line.split()[-1]) - assert(natoms_log>0 and natoms_log == natoms),"ERROR: detected atom number in log file is %d" % natoms + assert natoms_log > 0 and natoms_log == natoms, ( + "ERROR: detected atom number in log file is %d" % natoms + ) energy = [] cells = [] coords = [] force = [] stress = [] - coord_direct = [] #if the coordinate is direct type or not + coord_direct = [] # if the coordinate is direct type or not for i in range(len(loglines)): line = loglines[i] - if line[18:41] == "lattice constant (Bohr)": + if line[18:41] == "lattice constant (Bohr)": a0 = float(line.split()[-1]) - elif len(loglines[i].split()) >=2 and loglines[i].split()[1] == 'COORDINATES': - #read coordinate information + elif len(loglines[i].split()) >= 2 and loglines[i].split()[1] == "COORDINATES": + # read coordinate information coords.append([]) direct_coord = False - if loglines[i].split()[0] == 'DIRECT': + if loglines[i].split()[0] == "DIRECT": coord_direct.append(True) - for k in range(2,2+natoms): - coords[-1].append(list(map(lambda x: float(x),loglines[i+k].split()[1:4]))) - elif loglines[i].split()[0] == 'CARTESIAN': + for k in range(2, 2 + natoms): + coords[-1].append( + list(map(lambda x: float(x), loglines[i + k].split()[1:4])) + ) + elif loglines[i].split()[0] == "CARTESIAN": coord_direct.append(False) - for k in range(2,2+natoms): - coords[-1].append(list(map(lambda x: float(x)*a0,loglines[i+k].split()[1:4]))) + for k in range(2, 2 + natoms): + coords[-1].append( + list(map(lambda x: float(x) * a0, loglines[i + k].split()[1:4])) + ) else: - assert(False),"Unrecongnized coordinate type, %s, line:%d" % (loglines[i].split()[0],i) - - elif loglines[i][1:56] == "Lattice vectors: (Cartesian coordinate: in unit of a_0)": - #add the cell information for previous structures + assert False, "Unrecongnized coordinate type, %s, line:%d" % ( + loglines[i].split()[0], + i, + ) + + elif ( + loglines[i][1:56] + == "Lattice vectors: (Cartesian coordinate: in unit of a_0)" + ): + # add the cell information for previous structures while len(cells) < len(coords) - 1: - cells.append(cells[-1]) - #get current cell information + cells.append(cells[-1]) + # get current cell information cells.append([]) - for k in range(1,4): - cells[-1].append(list(map(lambda x:float(x)*a0,loglines[i+k].split()[0:3]))) + for k in range(1, 4): + cells[-1].append( + list(map(lambda x: float(x) * a0, loglines[i + k].split()[0:3])) + ) elif line[1:14] == "final etot is": - #add the energy for previous structures whose SCF is not converged + # add the energy for previous structures whose SCF is not converged while len(energy) < len(coords) - 1: energy.append(np.nan) - #get the energy of current structure + # get the energy of current structure energy.append(float(line.split()[-2])) - + elif line[4:15] == "TOTAL-FORCE": force.append([]) - for j in range(5,5+natoms): - force[-1].append(list(map(lambda x:float(x),loglines[i+j].split()[1:4]))) + for j in range(5, 5 + natoms): + force[-1].append( + list(map(lambda x: float(x), loglines[i + j].split()[1:4])) + ) elif line[1:13] == "TOTAL-STRESS": stress.append([]) - for j in range(4,7): - stress[-1].append(list(map(lambda x:float(x),loglines[i+j].split()[0:3]))) + for j in range(4, 7): + stress[-1].append( + list(map(lambda x: float(x), loglines[i + j].split()[0:3])) + ) - #delete last structures which has no energy + # delete last structures which has no energy while len(energy) < len(coords): del coords[-1] del coord_direct[-1] - - #add cells for last structures whose cell is not changed + + # add cells for last structures whose cell is not changed while len(cells) < len(coords): cells.append(cells[-1]) - - #only keep structures that have all of coord, force and stress + + # only keep structures that have all of coord, force and stress if len(stress) == 0 and len(force) == 0: minl = len(coords) elif len(stress) == 0: - minl = min(len(coords),len(force)) + minl = min(len(coords), len(force)) force = force[:minl] elif len(force) == 0: - minl = min(len(coords),len(stress)) + minl = min(len(coords), len(stress)) stress = stress[:minl] else: - minl = min(len(coords),len(force),len(stress)) + minl = min(len(coords), len(force), len(stress)) force = force[:minl] stress = stress[:minl] - + coords = coords[:minl] energy = energy[:minl] cells = cells[:minl] - - #delete structures whose energy is np.nan + + # delete structures whose energy is np.nan for i in range(minl): - if np.isnan(energy[i-minl]): - del energy[i-minl] - del coords[i-minl] - del cells[i-minl] - del coord_direct[i-minl] + if np.isnan(energy[i - minl]): + del energy[i - minl] + del coords[i - minl] + del cells[i - minl] + del coord_direct[i - minl] if len(force) > 0: - del force[i-minl] + del force[i - minl] if len(stress) > 0: - del stress[i-minl] - + del stress[i - minl] + energy = np.array(energy) cells = np.array(cells) coords = np.array(coords) stress = np.array(stress) force = np.array(force) - #transfer direct coordinate to cartessian type + # transfer direct coordinate to cartessian type for i in range(len(coords)): if coord_direct[i]: coords[i] = coords[i].dot(cells[i]) - #transfer bohrium to angstrom + # transfer bohrium to angstrom cells *= bohr2ang coords *= bohr2ang @@ -142,41 +162,48 @@ def get_coords_from_log(loglines,natoms): volume = np.linalg.det(cells[i, :, :].reshape([3, 3])) virial[i] = stress[i] * kbar2evperang3 * volume - return energy,cells,coords,force,stress,virial + return energy, cells, coords, force, stress, virial + -def get_frame (fname): +def get_frame(fname): if type(fname) == str: - # if the input parameter is only one string, it is assumed that it is the + # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: - raise RuntimeError('invalid input') - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU - with open(geometry_path_in, 'r') as fp: - geometry_inlines = fp.read().split('\n') - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coord_tmp = get_coords(celldm, cell, geometry_inlines, inlines) - - logf = get_log_file(fname, inlines) - assert(os.path.isfile(logf)),"Error: can not find %s" % logf - with open(logf) as f1: lines = f1.readlines() + raise RuntimeError("invalid input") + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU + with open(geometry_path_in, "r") as fp: + geometry_inlines = fp.read().split("\n") + celldm, cell = get_cell(geometry_inlines) + atom_names, natoms, types, coord_tmp = get_coords( + celldm, cell, geometry_inlines, inlines + ) + + logf = get_log_file(fname, inlines) + assert os.path.isfile(logf), "Error: can not find %s" % logf + with open(logf) as f1: + lines = f1.readlines() atomnumber = 0 - for i in natoms: atomnumber += i - energy,cells,coords,force,stress,virial = get_coords_from_log(lines,atomnumber) + for i in natoms: + atomnumber += i + energy, cells, coords, force, stress, virial = get_coords_from_log( + lines, atomnumber + ) data = {} - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - data['cells'] = cells - data['coords'] = coords - data['energies'] = energy - data['forces'] = force - data['virials'] = virial - data['stress'] = stress - data['orig'] = np.zeros(3) + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + data["cells"] = cells + data["coords"] = coords + data["energies"] = energy + data["forces"] = force + data["virials"] = virial + data["stress"] = stress + data["orig"] = np.zeros(3) return data diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index f3f80d483..94f4bf7b4 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -1,24 +1,27 @@ -import os,sys +import os, sys import numpy as np from ..unit import EnergyConversion, PressureConversion, LengthConversion import re + bohr2ang = LengthConversion("bohr", "angstrom").value() ry2ev = EnergyConversion("rydberg", "eV").value() kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value() + def CheckFile(ifile): if not os.path.isfile(ifile): print("Can not find file %s" % ifile) return False return True -def get_block (lines, keyword, skip = 0, nlines = None): + +def get_block(lines, keyword, skip=0, nlines=None): ret = [] found = False if not nlines: nlines = 1e6 - for idx,ii in enumerate(lines) : - if keyword in ii : + for idx, ii in enumerate(lines): + if keyword in ii: found = True blk_idx = idx + 1 + skip line_idx = 0 @@ -26,7 +29,7 @@ def get_block (lines, keyword, skip = 0, nlines = None): blk_idx += 1 while line_idx < nlines and blk_idx != len(lines): if len(re.split("\s+", lines[blk_idx])) == 0 or lines[blk_idx] == "": - blk_idx+=1 + blk_idx += 1 continue ret.append(lines[blk_idx]) blk_idx += 1 @@ -36,94 +39,107 @@ def get_block (lines, keyword, skip = 0, nlines = None): return None return ret + def get_geometry_in(fname, inlines): geometry_path_in = os.path.join(fname, "STRU") for line in inlines: - if "stru_file" in line and "stru_file"==line.split()[0]: - atom_file = line.split()[1] - geometry_path_in = os.path.join(fname, atom_file) - break + if "stru_file" in line and "stru_file" == line.split()[0]: + atom_file = line.split()[1] + geometry_path_in = os.path.join(fname, atom_file) + break return geometry_path_in + def get_path_out(fname, inlines): path_out = os.path.join(fname, "OUT.ABACUS/running_scf.log") for line in inlines: - if "suffix" in line and "suffix"==line.split()[0]: - suffix = line.split()[1] - path_out = os.path.join(fname, "OUT.%s/running_scf.log" % suffix) - break + if "suffix" in line and "suffix" == line.split()[0]: + suffix = line.split()[1] + path_out = os.path.join(fname, "OUT.%s/running_scf.log" % suffix) + break return path_out + def get_cell(geometry_inlines): - cell_lines = get_block(geometry_inlines, "LATTICE_VECTORS", skip = 0, nlines = 3) + cell_lines = get_block(geometry_inlines, "LATTICE_VECTORS", skip=0, nlines=3) celldm_lines = get_block(geometry_inlines, "LATTICE_CONSTANT", skip=0, nlines=1) - celldm = float(celldm_lines[0].split()[0]) * bohr2ang # lattice const is in Bohr + celldm = float(celldm_lines[0].split()[0]) * bohr2ang # lattice const is in Bohr cell = [] for ii in range(3): cell.append([float(jj) for jj in cell_lines[ii].split()[0:3]]) - cell = celldm*np.array(cell) + cell = celldm * np.array(cell) return celldm, cell + def get_coords(celldm, cell, geometry_inlines, inlines=None): coords_lines = get_block(geometry_inlines, "ATOMIC_POSITIONS", skip=0) # assuming that ATOMIC_POSITIONS is at the bottom of the STRU file - coord_type = coords_lines[0].split()[0].lower() # cartisan or direct - atom_names = [] # element abbr in periodic table - atom_types = [] # index of atom_names of each atom in the geometry - atom_numbs = [] # of atoms for each element - coords = [] # coordinations of atoms + coord_type = coords_lines[0].split()[0].lower() # cartisan or direct + atom_names = [] # element abbr in periodic table + atom_types = [] # index of atom_names of each atom in the geometry + atom_numbs = [] # of atoms for each element + coords = [] # coordinations of atoms ntype = get_nele_from_stru(geometry_inlines) - line_idx = 1 # starting line of first element + line_idx = 1 # starting line of first element for it in range(ntype): atom_names.append(coords_lines[line_idx].split()[0]) - line_idx+=2 + line_idx += 2 atom_numbs.append(int(coords_lines[line_idx].split()[0])) - line_idx+=1 + line_idx += 1 for iline in range(atom_numbs[it]): xyz = np.array([float(xx) for xx in coords_lines[line_idx].split()[0:3]]) if coord_type == "cartesian": - xyz = xyz*celldm + xyz = xyz * celldm elif coord_type == "direct": tmp = np.matmul(xyz, cell) xyz = tmp else: print("coord_type = %s" % coord_type) - raise RuntimeError("Input coordination type is invalid.\n Only direct and cartesian are accepted.") + raise RuntimeError( + "Input coordination type is invalid.\n Only direct and cartesian are accepted." + ) coords.append(xyz) atom_types.append(it) line_idx += 1 - coords = np.array(coords) # need transformation!!! + coords = np.array(coords) # need transformation!!! atom_types = np.array(atom_types) return atom_names, atom_numbs, atom_types, coords + def get_energy(outlines): Etot = None for line in outlines: if "!FINAL_ETOT_IS" in line: - Etot = float(line.split()[1]) # in eV + Etot = float(line.split()[1]) # in eV break if not Etot: - return Etot,False + return Etot, False for line in outlines: if "convergence has NOT been achieved!" in line: - return Etot,False - return Etot,True + return Etot, False + return Etot, True -def get_force (outlines, natoms): + +def get_force(outlines, natoms): force = [] - force_inlines = get_block (outlines, "TOTAL-FORCE (eV/Angstrom)", skip = 4, nlines=np.sum(natoms)) + force_inlines = get_block( + outlines, "TOTAL-FORCE (eV/Angstrom)", skip=4, nlines=np.sum(natoms) + ) if force_inlines is None: - print("TOTAL-FORCE (eV/Angstrom) is not found in OUT.XXX/running_scf.log. May be you haven't set 'cal_force 1' in the INPUT.") + print( + "TOTAL-FORCE (eV/Angstrom) is not found in OUT.XXX/running_scf.log. May be you haven't set 'cal_force 1' in the INPUT." + ) return [[]] for line in force_inlines: force.append([float(f) for f in line.split()[1:4]]) force = np.array(force) return force + def get_stress(outlines): stress = [] - stress_inlines = get_block(outlines, "TOTAL-STRESS (KBAR)", skip = 3, nlines=3) + stress_inlines = get_block(outlines, "TOTAL-STRESS (KBAR)", skip=3, nlines=3) if stress_inlines is None: return None for line in stress_inlines: @@ -132,60 +148,63 @@ def get_stress(outlines): return stress +def get_frame(fname): + data = { + "atom_names": [], + "atom_numbs": [], + "atom_types": [], + "cells": [], + "coords": [], + "energies": [], + "forces": [], + } -def get_frame (fname): - data = {'atom_names':[],\ - 'atom_numbs':[],\ - 'atom_types':[],\ - 'cells':[],\ - 'coords':[],\ - 'energies':[],\ - 'forces':[]} - if type(fname) == str: - # if the input parameter is only one string, it is assumed that it is the + # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: - raise RuntimeError('invalid input') - + raise RuntimeError("invalid input") + if not CheckFile(path_in): return data - - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - - geometry_path_in = get_geometry_in(fname, inlines) - path_out = get_path_out(fname, inlines) + + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + + geometry_path_in = get_geometry_in(fname, inlines) + path_out = get_path_out(fname, inlines) if not (CheckFile(geometry_path_in) and CheckFile(path_out)): return data - - with open(geometry_path_in, 'r') as fp: - geometry_inlines = fp.read().split('\n') - with open(path_out, 'r') as fp: - outlines = fp.read().split('\n') - - celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - - energy,converge = get_energy(outlines) + + with open(geometry_path_in, "r") as fp: + geometry_inlines = fp.read().split("\n") + with open(path_out, "r") as fp: + outlines = fp.read().split("\n") + + celldm, cell = get_cell(geometry_inlines) + atom_names, natoms, types, coords = get_coords( + celldm, cell, geometry_inlines, inlines + ) + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + + energy, converge = get_energy(outlines) if not converge: return data - force = get_force (outlines, natoms) - stress = get_stress(outlines) + force = get_force(outlines, natoms) + stress = get_stress(outlines) if stress is not None: - stress *= np.abs(np.linalg.det(cell)) - - data['cells'] = cell[np.newaxis, :, :] - data['coords'] = coords[np.newaxis, :, :] - data['energies'] = np.array(energy)[np.newaxis] - data['forces'] = force[np.newaxis, :, :] + stress *= np.abs(np.linalg.det(cell)) + + data["cells"] = cell[np.newaxis, :, :] + data["coords"] = coords[np.newaxis, :, :] + data["energies"] = np.array(energy)[np.newaxis] + data["forces"] = force[np.newaxis, :, :] if stress is not None: - data['virials'] = stress[np.newaxis, :, :] - data['orig'] = np.zeros(3) + data["virials"] = stress[np.newaxis, :, :] + data["orig"] = np.zeros(3) # print("atom_names = ", data['atom_names']) # print("natoms = ", data['atom_numbs']) # print("types = ", data['atom_types']) @@ -196,8 +215,16 @@ def get_frame (fname): # print("virial = ", data['virials']) return data + def get_nele_from_stru(geometry_inlines): - key_words_list = ["ATOMIC_SPECIES", "NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS", "NUMERICAL_DESCRIPTOR"] + key_words_list = [ + "ATOMIC_SPECIES", + "NUMERICAL_ORBITAL", + "LATTICE_CONSTANT", + "LATTICE_VECTORS", + "ATOMIC_POSITIONS", + "NUMERICAL_DESCRIPTOR", + ] keyword_sequence = [] keyword_line_index = [] atom_names = [] @@ -210,86 +237,107 @@ def get_nele_from_stru(geometry_inlines): if keyword in line and keyword == line.split()[0]: keyword_sequence.append(keyword) keyword_line_index.append(iline) - assert(len(keyword_line_index) == len(keyword_sequence)) - assert(len(keyword_sequence) > 0) + assert len(keyword_line_index) == len(keyword_sequence) + assert len(keyword_sequence) > 0 keyword_line_index.append(len(geometry_inlines)) nele = 0 for idx, keyword in enumerate(keyword_sequence): if keyword == "ATOMIC_SPECIES": - for iline in range(keyword_line_index[idx]+1, keyword_line_index[idx+1]): + for iline in range( + keyword_line_index[idx] + 1, keyword_line_index[idx + 1] + ): if len(re.split("\s+", geometry_inlines[iline])) >= 3: nele += 1 return nele + def get_frame_from_stru(fname): - assert(type(fname) == str) - with open(fname, 'r') as fp: - geometry_inlines = fp.read().split('\n') + assert type(fname) == str + with open(fname, "r") as fp: + geometry_inlines = fp.read().split("\n") nele = get_nele_from_stru(geometry_inlines) - inlines = ["ntype %d" %nele] + inlines = ["ntype %d" % nele] celldm, cell = get_cell(geometry_inlines) - atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) + atom_names, natoms, types, coords = get_coords( + celldm, cell, geometry_inlines, inlines + ) data = {} - data['atom_names'] = atom_names - data['atom_numbs'] = natoms - data['atom_types'] = types - data['cells'] = cell[np.newaxis, :, :] - data['coords'] = coords[np.newaxis, :, :] - data['orig'] = np.zeros(3) + data["atom_names"] = atom_names + data["atom_numbs"] = natoms + data["atom_types"] = types + data["cells"] = cell[np.newaxis, :, :] + data["coords"] = coords[np.newaxis, :, :] + data["orig"] = np.zeros(3) return data -def make_unlabeled_stru(data, frame_idx, pp_file=None, numerical_orbital=None, numerical_descriptor=None, mass=None): + +def make_unlabeled_stru( + data, + frame_idx, + pp_file=None, + numerical_orbital=None, + numerical_descriptor=None, + mass=None, +): out = "ATOMIC_SPECIES\n" - for iele in range(len(data['atom_names'])): - out += data['atom_names'][iele] + " " + for iele in range(len(data["atom_names"])): + out += data["atom_names"][iele] + " " if mass is not None: - out += "%.3f "%mass[iele] + out += "%.3f " % mass[iele] else: out += "1 " if pp_file is not None: - out += "%s\n"%pp_file[iele] + out += "%s\n" % pp_file[iele] else: out += "\n" out += "\n" if numerical_orbital is not None: - assert(len(numerical_orbital) == len(data['atom_names'])) + assert len(numerical_orbital) == len(data["atom_names"]) out += "NUMERICAL_ORBITAL\n" for iele in range(len(numerical_orbital)): - out += "%s\n"%numerical_orbital[iele] + out += "%s\n" % numerical_orbital[iele] out += "\n" if numerical_descriptor is not None: - assert(type(numerical_descriptor) == str) - out += "NUMERICAL_DESCRIPTOR\n%s\n"%numerical_descriptor + assert type(numerical_descriptor) == str + out += "NUMERICAL_DESCRIPTOR\n%s\n" % numerical_descriptor out += "\n" - + out += "LATTICE_CONSTANT\n" - out += str(1/bohr2ang) + "\n\n" + out += str(1 / bohr2ang) + "\n\n" out += "LATTICE_VECTORS\n" for ix in range(3): for iy in range(3): - out += str(data['cells'][frame_idx][ix][iy]) + " " + out += str(data["cells"][frame_idx][ix][iy]) + " " out += "\n" out += "\n" out += "ATOMIC_POSITIONS\n" out += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n" - #ret += "\n" + # ret += "\n" natom_tot = 0 - for iele in range(len(data['atom_names'])): - out += data['atom_names'][iele] + "\n" + for iele in range(len(data["atom_names"])): + out += data["atom_names"][iele] + "\n" out += "0.0\n" - out += str(data['atom_numbs'][iele]) + "\n" - for iatom in range(data['atom_numbs'][iele]): - out += "%.12f %.12f %.12f %d %d %d\n" % (data['coords'][frame_idx][natom_tot, 0], data['coords'][frame_idx][natom_tot, 1], data['coords'][frame_idx][natom_tot, 2], 1, 1, 1) + out += str(data["atom_numbs"][iele]) + "\n" + for iatom in range(data["atom_numbs"][iele]): + out += "%.12f %.12f %.12f %d %d %d\n" % ( + data["coords"][frame_idx][natom_tot, 0], + data["coords"][frame_idx][natom_tot, 1], + data["coords"][frame_idx][natom_tot, 2], + 1, + 1, + 1, + ) natom_tot += 1 - assert(natom_tot == sum(data['atom_numbs'])) + assert natom_tot == sum(data["atom_numbs"]) return out -#if __name__ == "__main__": + +# if __name__ == "__main__": # path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf" # data = get_frame(path) diff --git a/dpdata/amber/__init__.py b/dpdata/amber/__init__.py index 8b1378917..e69de29bb 100644 --- a/dpdata/amber/__init__.py +++ b/dpdata/amber/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/amber/mask.py b/dpdata/amber/mask.py index 99a7d3d3e..f44ee047e 100644 --- a/dpdata/amber/mask.py +++ b/dpdata/amber/mask.py @@ -4,9 +4,10 @@ except ImportError: pass + def pick_by_amber_mask(param, maskstr, coords=None): """Pick atoms by amber masks - + Parameters ---------- param: str or parmed.Structure @@ -22,10 +23,13 @@ def pick_by_amber_mask(param, maskstr, coords=None): sele = [] if len(maskstr) > 0: newmaskstr = maskstr.replace("@0", "!@*") - sele = [parm.atoms[i].idx for i in parmed.amber.mask.AmberMask( - parm, newmaskstr).Selected()] + sele = [ + parm.atoms[i].idx + for i in parmed.amber.mask.AmberMask(parm, newmaskstr).Selected() + ] return sele + def load_param_file(param_file): if isinstance(param_file, str): return parmed.load_file(param_file) diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py index 64227a6a6..0d178104a 100644 --- a/dpdata/amber/md.py +++ b/dpdata/amber/md.py @@ -7,15 +7,21 @@ from ..periodic_table import ELEMENTS kcalmol2eV = EnergyConversion("kcal_mol", "eV").value() -symbols = ['X'] + ELEMENTS +symbols = ["X"] + ELEMENTS energy_convert = kcalmol2eV force_convert = energy_convert -def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdout_file = None, - use_element_symbols=None, labeled=True, - ): +def read_amber_traj( + parm7_file, + nc_file, + mdfrc_file=None, + mden_file=None, + mdout_file=None, + use_element_symbols=None, + labeled=True, +): """The amber trajectory includes: * nc, NetCDF format, stores coordinates * mdfrc, NetCDF format, stores forces @@ -42,10 +48,12 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou for line in f: if line.startswith("%FLAG"): flag_atom_type = line.startswith("%FLAG AMBER_ATOM_TYPE") - flag_atom_numb = (use_element_symbols is not None) and line.startswith("%FLAG ATOMIC_NUMBER") + flag_atom_numb = (use_element_symbols is not None) and line.startswith( + "%FLAG ATOMIC_NUMBER" + ) elif flag_atom_type or flag_atom_numb: if line.startswith("%FORMAT"): - fmt = re.findall(r'\d+', line) + fmt = re.findall(r"\d+", line) fmt0 = int(fmt[0]) fmt1 = int(fmt[1]) else: @@ -58,18 +66,18 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou if flag_atom_type: amber_types.append(content) elif flag_atom_numb: - atomic_number.append(int(content)) + atomic_number.append(int(content)) if use_element_symbols is not None: if isinstance(use_element_symbols, str): use_element_symbols = pick_by_amber_mask(parm7_file, use_element_symbols) for ii in use_element_symbols: amber_types[ii] = symbols[atomic_number[ii]] - with netcdf.netcdf_file(nc_file, 'r') as f: + with netcdf.netcdf_file(nc_file, "r") as f: coords = np.array(f.variables["coordinates"][:]) cell_lengths = np.array(f.variables["cell_lengths"][:]) cell_angles = np.array(f.variables["cell_angles"][:]) - if np.all(cell_angles > 89.99 ) and np.all(cell_angles < 90.01): + if np.all(cell_angles > 89.99) and np.all(cell_angles < 90.01): # only support 90 # TODO: support other angles shape = cell_lengths.shape @@ -80,7 +88,7 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou raise RuntimeError("Unsupported cells") if labeled: - with netcdf.netcdf_file(mdfrc_file, 'r') as f: + with netcdf.netcdf_file(mdfrc_file, "r") as f: forces = np.array(f.variables["forces"][:]) # load energy from mden_file or mdout_file @@ -101,17 +109,18 @@ def read_amber_traj(parm7_file, nc_file, mdfrc_file=None, mden_file = None, mdou else: raise RuntimeError("Please provide one of mden_file and mdout_file") - atom_names, atom_types, atom_numbs = np.unique(amber_types, return_inverse=True, return_counts=True) + atom_names, atom_types, atom_numbs = np.unique( + amber_types, return_inverse=True, return_counts=True + ) data = {} - data['atom_names'] = list(atom_names) - data['atom_numbs'] = list(atom_numbs) - data['atom_types'] = atom_types + data["atom_names"] = list(atom_names) + data["atom_numbs"] = list(atom_numbs) + data["atom_types"] = atom_types if labeled: - data['forces'] = forces * force_convert - data['energies'] = np.array(energies) * energy_convert - data['coords'] = coords - data['cells'] = cells - data['orig'] = np.array([0, 0, 0]) + data["forces"] = forces * force_convert + data["energies"] = np.array(energies) * energy_convert + data["coords"] = coords + data["cells"] = cells + data["orig"] = np.array([0, 0, 0]) return data - diff --git a/dpdata/amber/sqm.py b/dpdata/amber/sqm.py index 7080e43ee..7826d2018 100644 --- a/dpdata/amber/sqm.py +++ b/dpdata/amber/sqm.py @@ -10,10 +10,11 @@ READ_COORDS = 6 READ_FORCES = 7 + def parse_sqm_out(fname): - ''' - Read atom symbols, charges and coordinates from ambertools sqm.out file - ''' + """ + Read atom symbols, charges and coordinates from ambertools sqm.out file + """ atom_symbols = [] coords = [] charges = [] @@ -55,36 +56,41 @@ def parse_sqm_out(fname): forces.append([float(ll[-60:-40]), float(ll[-40:-20]), float(ll[-20:])]) if len(forces) == len(charges): flag = START - + data = {} - atom_names, data['atom_types'], atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True) - data['charges'] = np.array(charges) - data['atom_names'] = list(atom_names) - data['atom_numbs'] = list(atom_numbs) - data['orig'] = np.array([0, 0, 0]) - data['cells'] = np.array([[[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]]]) - data['nopbc'] = True - data['coords'] = np.array([coords]) + atom_names, data["atom_types"], atom_numbs = np.unique( + atom_symbols, return_inverse=True, return_counts=True + ) + data["charges"] = np.array(charges) + data["atom_names"] = list(atom_names) + data["atom_numbs"] = list(atom_numbs) + data["orig"] = np.array([0, 0, 0]) + data["cells"] = np.array( + [[[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]] + ) + data["nopbc"] = True + data["coords"] = np.array([coords]) energies = np.array(energies) forces = -np.array([forces], dtype=np.float64) * kcal2ev if len(forces) > 0: - data['energies'] = energies - data['forces'] = forces - + data["energies"] = energies + data["forces"] = forces + return data + def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): - symbols = [data['atom_names'][ii] for ii in data['atom_types']] + symbols = [data["atom_names"][ii] for ii in data["atom_types"]] atomic_numbers = [ELEMENTS.index(ss) + 1 for ss in symbols] charge = kwargs.get("charge", 0) # multiplicity mult = kwargs.get("mult", 1) - if mult != 1 : + if mult != 1: raise RuntimeError("Multiplicity is not 1, which is not supported by sqm") - maxcyc = kwargs.get("maxcyc", 0) # 0 represents a single-point calculation + maxcyc = kwargs.get("maxcyc", 0) # 0 represents a single-point calculation theory = kwargs.get("qm_theory", "DFTB3") ret = "Run semi-emperical minimization\n" ret += " &qmmm\n" @@ -93,15 +99,15 @@ def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): ret += f" maxcyc={maxcyc}\n" ret += " verbosity=4\n" ret += " /\n" - for ii in range(len(data['atom_types'])): + for ii in range(len(data["atom_types"])): ret += "{:>4s}{:>6s}{:>16s}{:>16s}{:>16s}\n".format( str(atomic_numbers[ii]), str(symbols[ii]), f"{data['coords'][frame_idx][ii, 0]:.6f}", f"{data['coords'][frame_idx][ii, 1]:.6f}", - f"{data['coords'][frame_idx][ii, 2]:.6f}" + f"{data['coords'][frame_idx][ii, 2]:.6f}", ) if fname is not None: - with open(fname, 'w') as fp: + with open(fname, "w") as fp: fp.write(ret) return ret diff --git a/dpdata/ase_calculator.py b/dpdata/ase_calculator.py index df89d05cb..ea5e50094 100644 --- a/dpdata/ase_calculator.py +++ b/dpdata/ase_calculator.py @@ -1,7 +1,9 @@ from typing import List, Optional, TYPE_CHECKING from ase.calculators.calculator import ( - Calculator, all_changes, PropertyNotImplementedError + Calculator, + all_changes, + PropertyNotImplementedError, ) import dpdata @@ -12,7 +14,7 @@ class DPDataCalculator(Calculator): - """Implementation of ASE deepmd calculator based on a driver. + """Implementation of ASE deepmd calculator based on a driver. Parameters ---------- @@ -21,14 +23,9 @@ class DPDataCalculator(Calculator): """ name = "dpdata" - implemented_properties = [ - "energy", "free_energy", "forces", "virial", "stress"] + implemented_properties = ["energy", "free_energy", "forces", "virial", "stress"] - def __init__( - self, - driver: Driver, - **kwargs - ) -> None: + def __init__(self, driver: Driver, **kwargs) -> None: Calculator.__init__(self, label=Driver.__name__, **kwargs) self.driver = driver @@ -56,21 +53,24 @@ def calculate( system = dpdata.System(self.atoms, fmt="ase/structure") data = system.predict(driver=self.driver).data - self.results['energy'] = data['energies'][0] + self.results["energy"] = data["energies"][0] # see https://gitlab.com/ase/ase/-/merge_requests/2485 - self.results['free_energy'] = data['energies'][0] - self.results['forces'] = data['forces'][0] - if 'virials' in data: - self.results['virial'] = data['virials'][0].reshape(3, 3) + self.results["free_energy"] = data["energies"][0] + self.results["forces"] = data["forces"][0] + if "virials" in data: + self.results["virial"] = data["virials"][0].reshape(3, 3) # convert virial into stress for lattice relaxation if "stress" in properties: if sum(atoms.get_pbc()) > 0: # the usual convention (tensile stress is positive) # stress = -virial / volume - stress = -0.5 * (data['virials'][0].copy() + data['virials'][0].copy().T) / \ - atoms.get_volume() + stress = ( + -0.5 + * (data["virials"][0].copy() + data["virials"][0].copy().T) + / atoms.get_volume() + ) # Voigt notation - self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]] + self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]] else: raise PropertyNotImplementedError diff --git a/dpdata/bond_order_system.py b/dpdata/bond_order_system.py index f2a66fd2a..6cb834d47 100644 --- a/dpdata/bond_order_system.py +++ b/dpdata/bond_order_system.py @@ -6,38 +6,42 @@ from dpdata.rdkit.sanitize import Sanitizer, SanitizeError from copy import deepcopy from rdkit.Chem import Conformer + # import dpdata.rdkit.mol2 - + class BondOrderSystem(System): - ''' + """ The system with chemical bond and formal charges information For example, a labeled methane system named `d_example` has one molecule (5 atoms, 4 bonds) and `n_frames` frames. The bond order and formal charge information can be accessed by - `d_example['bonds']` : a numpy array of size 4 x 3, and the first column represents the index of begin atom, - the second column represents the index of end atom, + the second column represents the index of end atom, the third columen represents the bond order: 1 - single bond, 2 - double bond, 3 - triple bond, 1.5 - aromatic bond - `d_example['formal_charges']` : a numpy array of size 5 x 1 - ''' + """ + DTYPES = System.DTYPES + ( DataType("bonds", np.ndarray, (Axis.NBONDS, 3)), DataType("formal_charges", np.ndarray, (Axis.NATOMS,)), ) - def __init__(self, - file_name = None, - fmt = 'auto', - type_map = None, - begin = 0, - step = 1, - data = None, - rdkit_mol = None, - sanitize_level = "medium", - raise_errors = True, - verbose = False, - **kwargs): + def __init__( + self, + file_name=None, + fmt="auto", + type_map=None, + begin=0, + step=1, + data=None, + rdkit_mol=None, + sanitize_level="medium", + raise_errors=True, + verbose=False, + **kwargs, + ): """ Constructor @@ -76,12 +80,9 @@ def __init__(self, mol = dpdata.rdkit.utils.system_data_to_mol(data) self.from_rdkit_mol(mol) if file_name: - self.from_fmt(file_name, - fmt, - type_map=type_map, - begin=begin, - step=step, - **kwargs) + self.from_fmt( + file_name, fmt, type_map=type_map, begin=begin, step=step, **kwargs + ) elif rdkit_mol: self.from_rdkit_mol(rdkit_mol) else: @@ -94,7 +95,7 @@ def __init__(self, def from_fmt_obj(self, fmtobj, file_name, **kwargs): mol = fmtobj.from_bond_order_system(file_name, **kwargs) self.from_rdkit_mol(mol) - if hasattr(fmtobj.from_bond_order_system, 'post_func'): + if hasattr(fmtobj.from_bond_order_system, "post_func"): for post_f in fmtobj.from_bond_order_system.post_func: self.post_funcs.get_plugin(post_f)(self) return self @@ -109,9 +110,9 @@ def to_fmt_obj(self, fmtobj, *args, **kwargs): return fmtobj.to_bond_order_system(self.data, self.rdkit_mol, *args, **kwargs) def __str__(self): - ''' - A brief summary of the system - ''' + """ + A brief summary of the system + """ ret = "Data Summary" ret += "\nBondOrder System" ret += "\n-------------------" @@ -120,47 +121,49 @@ def __str__(self): ret += f"\nBond Numbers : {self.get_nbonds()}" ret += "\nElement List :" ret += "\n-------------------" - ret += "\n"+" ".join(map(str,self.get_atom_names())) - ret += "\n"+" ".join(map(str,self.get_atom_numbs())) + ret += "\n" + " ".join(map(str, self.get_atom_names())) + ret += "\n" + " ".join(map(str, self.get_atom_numbs())) return ret def get_nbonds(self): - ''' - Return the number of bonds - ''' - return len(self.data['bonds']) - + """ + Return the number of bonds + """ + return len(self.data["bonds"]) + def get_charge(self): - ''' - Return the total formal charge of the moleclue - ''' - return sum(self.data['formal_charges']) - + """ + Return the total formal charge of the moleclue + """ + return sum(self.data["formal_charges"]) + def get_mol(self): - ''' - Return the rdkit.Mol object - ''' + """ + Return the rdkit.Mol object + """ return self.rdkit_mol - + def get_bond_order(self, begin_atom_idx, end_atom_idx): - ''' - Return the bond order between given atoms - ''' - return self.data['bond_dict'][f'{int(begin_atom_idx)}-{int(end_atom_idx)}'] - + """ + Return the bond order between given atoms + """ + return self.data["bond_dict"][f"{int(begin_atom_idx)}-{int(end_atom_idx)}"] + def get_formal_charges(self): - ''' - Return the formal charges on each atom - ''' - return self.data['formal_charges'] - + """ + Return the formal charges on each atom + """ + return self.data["formal_charges"] + def copy(self): new_mol = deepcopy(self.rdkit_mol) - self.__class__(data=deepcopy(self.data), - rdkit_mol=new_mol) - + self.__class__(data=deepcopy(self.data), rdkit_mol=new_mol) + def __add__(self, other): - raise NotImplementedError("magic method '+' has not been implemented on BondOrderSystem") + raise NotImplementedError( + "magic method '+' has not been implemented on BondOrderSystem" + ) + # ''' # magic method "+" operation # ''' @@ -173,10 +176,12 @@ def __add__(self, other): # raise RuntimeError(f"Unsupported data structure: {type(other)}") def from_rdkit_mol(self, rdkit_mol): - ''' - Initialize from a rdkit.Chem.rdchem.Mol object - ''' + """ + Initialize from a rdkit.Chem.rdchem.Mol object + """ rdkit_mol = self.sanitizer.sanitize(rdkit_mol) self.data = dpdata.rdkit.utils.mol_to_system_data(rdkit_mol) - self.data['bond_dict'] = dict([(f'{int(bond[0])}-{int(bond[1])}', bond[2]) for bond in self.data['bonds']]) + self.data["bond_dict"] = dict( + [(f"{int(bond[0])}-{int(bond[1])}", bond[2]) for bond in self.data["bonds"]] + ) self.rdkit_mol = rdkit_mol diff --git a/dpdata/cli.py b/dpdata/cli.py index 2b33f9590..88e49ba5d 100644 --- a/dpdata/cli.py +++ b/dpdata/cli.py @@ -21,13 +21,24 @@ def dpdata_parser() -> argparse.ArgumentParser: parser.add_argument("from_file", type=str, help="read data from a file") parser.add_argument("--to_file", "-O", type=str, help="dump data to a file") - parser.add_argument("--from_format", "-i", type=str, default="auto", help="the format of from_file") + parser.add_argument( + "--from_format", "-i", type=str, default="auto", help="the format of from_file" + ) parser.add_argument("--to_format", "-o", type=str, help="the format of to_file") - parser.add_argument("--no-labeled", "-n", action="store_true", help="labels aren't provided") - parser.add_argument("--multi", "-m", action="store_true", help="the system contains multiple directories") + parser.add_argument( + "--no-labeled", "-n", action="store_true", help="labels aren't provided" + ) + parser.add_argument( + "--multi", + "-m", + action="store_true", + help="the system contains multiple directories", + ) parser.add_argument("--type-map", "-t", type=str, nargs="+", help="type map") - parser.add_argument('--version', action='version', version='dpdata v%s' % __version__) + parser.add_argument( + "--version", action="version", version="dpdata v%s" % __version__ + ) return parser @@ -45,17 +56,19 @@ def dpdata_cli(): convert(**vars(parsed_args)) -def convert(*, - from_file: str, - from_format: str = "auto", - to_file: Optional[str] = None, - to_format: Optional[str] = None, - no_labeled: bool = False, - multi: bool = False, - type_map: Optional[list] = None, - **kwargs): +def convert( + *, + from_file: str, + from_format: str = "auto", + to_file: Optional[str] = None, + to_format: Optional[str] = None, + no_labeled: bool = False, + multi: bool = False, + type_map: Optional[list] = None, + **kwargs +): """Convert files from one format to another one. - + Parameters ---------- from_file : str @@ -74,7 +87,9 @@ def convert(*, type map """ if multi: - s = MultiSystems.from_file(from_file, fmt=from_format, type_map=type_map, labeled=not no_labeled) + s = MultiSystems.from_file( + from_file, fmt=from_format, type_map=type_map, labeled=not no_labeled + ) elif not no_labeled: s = LabeledSystem(from_file, fmt=from_format, type_map=type_map) else: diff --git a/dpdata/cp2k/cell.py b/dpdata/cp2k/cell.py index 017986ec1..3fd8b6c5e 100644 --- a/dpdata/cp2k/cell.py +++ b/dpdata/cp2k/cell.py @@ -1,42 +1,54 @@ - #%% import numpy as np from collections import OrderedDict import re -def cell_to_low_triangle(A,B,C,alpha,beta,gamma): + +def cell_to_low_triangle(A, B, C, alpha, beta, gamma): """ - Convert cell to low triangle matrix. + Convert cell to low triangle matrix. - Parameters - ---------- - A : float - cell length A - B : float - cell length B - C : float - cell length C - alpha : float - radian. The angle between vector B and vector C. - beta : float - radian. The angle between vector A and vector C. - gamma : float - radian. The angle between vector B and vector C. - - Returns - ------- - cell : list - The cell matrix used by dpdata in low triangle form. + Parameters + ---------- + A : float + cell length A + B : float + cell length B + C : float + cell length C + alpha : float + radian. The angle between vector B and vector C. + beta : float + radian. The angle between vector A and vector C. + gamma : float + radian. The angle between vector B and vector C. + + Returns + ------- + cell : list + The cell matrix used by dpdata in low triangle form. """ - if not np.pi*5/180 0.2: raise RuntimeError("A=={}, must be greater than 0.2".format(A)) if not B > 0.2: @@ -47,15 +59,17 @@ def cell_to_low_triangle(A,B,C,alpha,beta,gamma): lx = A xy = B * np.cos(gamma) xz = C * np.cos(beta) - ly = B* np.sin(gamma) + ly = B * np.sin(gamma) if not ly > 0.1: - raise RuntimeError("ly:=B* np.sin(gamma)=={}, must be greater than 0.1",format(ly)) - yz = (B*C*np.cos(alpha)-xy*xz)/ly - if not C**2-xz**2-yz**2 > 0.01: - raise RuntimeError("lz^2:=C**2-xz**2-yz**2=={}, must be greater than 0.01",format(C**2-xz**2-yz**2)) - lz = np.sqrt(C**2-xz**2-yz**2) - cell = np.asarray([[lx, 0 , 0], - [xy, ly, 0 ], - [xz, yz, lz]]).astype('float32') + raise RuntimeError( + "ly:=B* np.sin(gamma)=={}, must be greater than 0.1", format(ly) + ) + yz = (B * C * np.cos(alpha) - xy * xz) / ly + if not C**2 - xz**2 - yz**2 > 0.01: + raise RuntimeError( + "lz^2:=C**2-xz**2-yz**2=={}, must be greater than 0.01", + format(C**2 - xz**2 - yz**2), + ) + lz = np.sqrt(C**2 - xz**2 - yz**2) + cell = np.asarray([[lx, 0, 0], [xy, ly, 0], [xz, yz, lz]]).astype("float32") return cell - diff --git a/dpdata/cp2k/output.py b/dpdata/cp2k/output.py index 2f5e9cc61..965d06567 100644 --- a/dpdata/cp2k/output.py +++ b/dpdata/cp2k/output.py @@ -5,33 +5,40 @@ from scipy.constants import R from .cell import cell_to_low_triangle -from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion +from ..unit import ( + EnergyConversion, + LengthConversion, + ForceConversion, + PressureConversion, +) #%% AU_TO_ANG = LengthConversion("bohr", "angstrom").value() AU_TO_EV = EnergyConversion("hartree", "eV").value() AU_TO_EV_EVERY_ANG = ForceConversion("hartree/bohr", "eV/angstrom").value() -delimiter_patterns=[] -delimiter_p1 = re.compile(r'^ \* GO CP2K GO! \*+') -delimiter_p2 = re.compile(r'^ \*+') +delimiter_patterns = [] +delimiter_p1 = re.compile(r"^ \* GO CP2K GO! \*+") +delimiter_p2 = re.compile(r"^ \*+") delimiter_patterns.append(delimiter_p1) delimiter_patterns.append(delimiter_p2) avail_patterns = [] -avail_patterns.append(re.compile(r'^ INITIAL POTENTIAL ENERGY')) -avail_patterns.append(re.compile(r'^ ENSEMBLE TYPE')) +avail_patterns.append(re.compile(r"^ INITIAL POTENTIAL ENERGY")) +avail_patterns.append(re.compile(r"^ ENSEMBLE TYPE")) + class Cp2kSystems(object): """ deal with cp2k outputfile """ + def __init__(self, log_file_name, xyz_file_name, restart=False): - self.log_file_object = open(log_file_name, 'r') - self.xyz_file_object = open(xyz_file_name, 'r') + self.log_file_object = open(log_file_name, "r") + self.xyz_file_object = open(xyz_file_name, "r") self.log_block_generator = self.get_log_block_generator() self.xyz_block_generator = self.get_xyz_block_generator() self.restart_flag = restart - self.cell=None - self.print_level=None + self.cell = None + self.print_level = None self.atomic_kinds = None if self.restart_flag: @@ -47,15 +54,19 @@ def __iter__(self): def __next__(self): info_dict = {} log_info_dict = self.handle_single_log_frame(next(self.log_block_generator)) - #print(log_info_dict) + # print(log_info_dict) xyz_info_dict = self.handle_single_xyz_frame(next(self.xyz_block_generator)) - #eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])] - #eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])] - #eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])] - #assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') - #assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') - #assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') - assert log_info_dict['energies']==xyz_info_dict['energies'], (log_info_dict['energies'], xyz_info_dict['energies'],'There may be errors in the file') + # eq1 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_numbs'], xyz_info_dict['atom_numbs'])] + # eq2 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_names'], xyz_info_dict['atom_names'])] + # eq3 = [v1==v2 for v1,v2 in zip(log_info_dict['atom_types'], xyz_info_dict['atom_types'])] + # assert all(eq1), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') + # assert all(eq2), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') + # assert all(eq3), (log_info_dict,xyz_info_dict,'There may be errors in the file. If it is a restart task; use restart=True') + assert log_info_dict["energies"] == xyz_info_dict["energies"], ( + log_info_dict["energies"], + xyz_info_dict["energies"], + "There may be errors in the file", + ) info_dict.update(log_info_dict) info_dict.update(xyz_info_dict) return info_dict @@ -80,10 +91,10 @@ def get_log_block_generator(self): else: break if delimiter_flag is True: - raise RuntimeError('This file lacks some content, please check') + raise RuntimeError("This file lacks some content, please check") def get_xyz_block_generator(self): - p3 = re.compile(r'^\s*(\d+)\s*') + p3 = re.compile(r"^\s*(\d+)\s*") while True: line = self.xyz_file_object.readline() if not line: @@ -92,122 +103,172 @@ def get_xyz_block_generator(self): atom_num = int(p3.match(line).group(1)) lines = [] lines.append(line) - for ii in range(atom_num+1): + for ii in range(atom_num + 1): lines.append(self.xyz_file_object.readline()) if not lines[-1]: - raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines)) + raise RuntimeError( + "this xyz file may lack of lines, should be {};lines:{}".format( + atom_num + 2, lines + ) + ) yield lines def handle_single_log_frame(self, lines): - info_dict={} - energy_pattern_1 = re.compile(r' INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)') + info_dict = {} + energy_pattern_1 = re.compile( + r" INITIAL POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)" + ) # CONSERVED QUANTITY [hartree] = -0.279168013085E+04 - energy_pattern_2 = re.compile(r' POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)') - energy=None - cell_length_pattern = re.compile(r' (INITIAL ){0,1}CELL LNTHS\[bohr\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_angle_pattern = re.compile(r' (INITIAL ){0,1}CELL ANGLS\[deg\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_A, cell_B, cell_C = (0,0,0,) - cell_alpha, cell_beta, cell_gamma=(0,0,0,) - cell_a_pattern = re.compile(r' CELL\| Vector a \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_b_pattern = re.compile(r' CELL\| Vector b \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - cell_c_pattern = re.compile(r' CELL\| Vector c \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)') - force_start_pattern = re.compile(r' ATOMIC FORCES in') - force_flag=False - force_end_pattern = re.compile(r' SUM OF ATOMIC FORCES') - force_lines= [] - cell_flag=0 - print_level_pattern = re.compile(r' GLOBAL\| Global print level\s+(?P\S+)') + energy_pattern_2 = re.compile( + r" POTENTIAL ENERGY\[hartree\]\s+=\s+(?P\S+)" + ) + energy = None + cell_length_pattern = re.compile( + r" (INITIAL ){0,1}CELL LNTHS\[bohr\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_angle_pattern = re.compile( + r" (INITIAL ){0,1}CELL ANGLS\[deg\]\s+=\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_A, cell_B, cell_C = ( + 0, + 0, + 0, + ) + cell_alpha, cell_beta, cell_gamma = ( + 0, + 0, + 0, + ) + cell_a_pattern = re.compile( + r" CELL\| Vector a \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_b_pattern = re.compile( + r" CELL\| Vector b \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + cell_c_pattern = re.compile( + r" CELL\| Vector c \[angstrom\]:\s+(?P\S+)\s+(?P\S+)\s+(?P\S+)" + ) + force_start_pattern = re.compile(r" ATOMIC FORCES in") + force_flag = False + force_end_pattern = re.compile(r" SUM OF ATOMIC FORCES") + force_lines = [] + cell_flag = 0 + print_level_pattern = re.compile( + r" GLOBAL\| Global print level\s+(?P\S+)" + ) print_level_flag = 0 - atomic_kinds_pattern = re.compile(r'\s+\d+\. Atomic kind:\s+(?P\S+)') - atomic_kinds = [] - stress_sign = 'STRESS' + atomic_kinds_pattern = re.compile(r"\s+\d+\. Atomic kind:\s+(?P\S+)") + atomic_kinds = [] + stress_sign = "STRESS" stress_flag = 0 stress = [] for line in lines: - if stress_flag == 3 : - if (line == '\n') : + if stress_flag == 3: + if line == "\n": stress_flag = 0 - else : + else: stress.append(line.split()[1:4]) - if stress_flag == 2 : + if stress_flag == 2: stress_flag = 3 - if stress_flag == 1 : + if stress_flag == 1: stress_flag = 2 - if (stress_sign in line): + if stress_sign in line: stress_flag = 1 if force_start_pattern.match(line): - force_flag=True + force_flag = True if force_end_pattern.match(line): - assert force_flag is True, (force_flag,'there may be errors in this file ') - force_flag=False + assert force_flag is True, ( + force_flag, + "there may be errors in this file ", + ) + force_flag = False if force_flag is True: - force_lines.append(line) + force_lines.append(line) if energy_pattern_1.match(line): - energy = float(energy_pattern_1.match(line).groupdict()['number']) * AU_TO_EV - #print('1to', energy) + energy = ( + float(energy_pattern_1.match(line).groupdict()["number"]) * AU_TO_EV + ) + # print('1to', energy) if energy_pattern_2.match(line): - energy = float(energy_pattern_2.match(line).groupdict()['number']) * AU_TO_EV + energy = ( + float(energy_pattern_2.match(line).groupdict()["number"]) * AU_TO_EV + ) if cell_length_pattern.match(line): - cell_A = float(cell_length_pattern.match(line).groupdict()['A']) * AU_TO_ANG - cell_B = float(cell_length_pattern.match(line).groupdict()['B']) * AU_TO_ANG - cell_C = float(cell_length_pattern.match(line).groupdict()['C']) * AU_TO_ANG - cell_flag+=1 + cell_A = ( + float(cell_length_pattern.match(line).groupdict()["A"]) * AU_TO_ANG + ) + cell_B = ( + float(cell_length_pattern.match(line).groupdict()["B"]) * AU_TO_ANG + ) + cell_C = ( + float(cell_length_pattern.match(line).groupdict()["C"]) * AU_TO_ANG + ) + cell_flag += 1 if cell_angle_pattern.match(line): - cell_alpha = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['alpha'])) - cell_beta = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['beta'])) - cell_gamma = np.deg2rad(float(cell_angle_pattern.match(line).groupdict()['gamma'])) - cell_flag+=1 + cell_alpha = np.deg2rad( + float(cell_angle_pattern.match(line).groupdict()["alpha"]) + ) + cell_beta = np.deg2rad( + float(cell_angle_pattern.match(line).groupdict()["beta"]) + ) + cell_gamma = np.deg2rad( + float(cell_angle_pattern.match(line).groupdict()["gamma"]) + ) + cell_flag += 1 if print_level_pattern.match(line): - print_level = print_level_pattern.match(line).groupdict()['print_level'] + print_level = print_level_pattern.match(line).groupdict()["print_level"] print_level_flag += 1 if cell_a_pattern.match(line): - cell_ax = float(cell_a_pattern.match(line).groupdict()['ax']) - cell_ay = float(cell_a_pattern.match(line).groupdict()['ay']) - cell_az = float(cell_a_pattern.match(line).groupdict()['az']) - cell_flag+=1 + cell_ax = float(cell_a_pattern.match(line).groupdict()["ax"]) + cell_ay = float(cell_a_pattern.match(line).groupdict()["ay"]) + cell_az = float(cell_a_pattern.match(line).groupdict()["az"]) + cell_flag += 1 if cell_b_pattern.match(line): - cell_bx = float(cell_b_pattern.match(line).groupdict()['bx']) - cell_by = float(cell_b_pattern.match(line).groupdict()['by']) - cell_bz = float(cell_b_pattern.match(line).groupdict()['bz']) - cell_flag+=1 + cell_bx = float(cell_b_pattern.match(line).groupdict()["bx"]) + cell_by = float(cell_b_pattern.match(line).groupdict()["by"]) + cell_bz = float(cell_b_pattern.match(line).groupdict()["bz"]) + cell_flag += 1 if cell_c_pattern.match(line): - cell_cx = float(cell_c_pattern.match(line).groupdict()['cx']) - cell_cy = float(cell_c_pattern.match(line).groupdict()['cy']) - cell_cz = float(cell_c_pattern.match(line).groupdict()['cz']) - cell_flag+=1 + cell_cx = float(cell_c_pattern.match(line).groupdict()["cx"]) + cell_cy = float(cell_c_pattern.match(line).groupdict()["cy"]) + cell_cz = float(cell_c_pattern.match(line).groupdict()["cz"]) + cell_flag += 1 if atomic_kinds_pattern.match(line): - akind = atomic_kinds_pattern.match(line).groupdict()['akind'] + akind = atomic_kinds_pattern.match(line).groupdict()["akind"] atomic_kinds.append(akind) if print_level_flag == 1: self.print_level = print_level - if print_level == 'LOW': - raise RuntimeError("please provide cp2k output with higher print level(at least MEDIUM)") - + if print_level == "LOW": + raise RuntimeError( + "please provide cp2k output with higher print level(at least MEDIUM)" + ) if cell_flag == 2: - self.cell = cell_to_low_triangle(cell_A,cell_B,cell_C, - cell_alpha,cell_beta,cell_gamma) + self.cell = cell_to_low_triangle( + cell_A, cell_B, cell_C, cell_alpha, cell_beta, cell_gamma + ) elif cell_flag == 5: self.cell = np.asarray( [ [cell_ax, cell_ay, cell_az], - [cell_bx, cell_by, cell_bz], - [cell_cx, cell_cy, cell_cz]] - ).astype('float32') + [cell_bx, cell_by, cell_bz], + [cell_cx, cell_cy, cell_cz], + ] + ).astype("float32") if atomic_kinds: self.atomic_kinds = atomic_kinds - #print(self.atomic_kinds) - # lx = cell_A - # xy = cell_B * np.cos(cell_gamma) - # xz = cell_C * np.cos(cell_beta) - # ly = cell_B* np.sin(cell_gamma) - # yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly - # lz = np.sqrt(cell_C**2-xz**2-yz**2) - # self.cell = [[lx, 0 , 0], - # [xy, ly, 0 ], - # [xz, yz, lz]] + # print(self.atomic_kinds) + # lx = cell_A + # xy = cell_B * np.cos(cell_gamma) + # xz = cell_C * np.cos(cell_beta) + # ly = cell_B* np.sin(cell_gamma) + # yz = (cell_B*cell_C*np.cos(cell_alpha)-xy*xz)/ly + # lz = np.sqrt(cell_C**2-xz**2-yz**2) + # self.cell = [[lx, 0 , 0], + # [xy, ly, 0 ], + # [xz, yz, lz]] element_index = -1 element_dict = OrderedDict() @@ -215,58 +276,66 @@ def handle_single_log_frame(self, lines): forces_list = [] for line in force_lines[3:]: line_list = line.split() - #print(line_list) + # print(line_list) if element_dict.get(line_list[1]): - element_dict[line_list[1]][1]+=1 + element_dict[line_list[1]][1] += 1 else: - element_index +=1 - element_dict[line_list[1]]=[element_index,1] + element_index += 1 + element_dict[line_list[1]] = [element_index, 1] atom_types_idx_list.append(element_dict[line_list[1]][0]) - forces_list.append([float(line_list[3])*AU_TO_EV_EVERY_ANG, - float(line_list[4])*AU_TO_EV_EVERY_ANG, - float(line_list[5])*AU_TO_EV_EVERY_ANG]) - #print(atom_types_idx_list) - #atom_names=list(element_dict.keys()) - atom_names=self.atomic_kinds - atom_numbs=[] + forces_list.append( + [ + float(line_list[3]) * AU_TO_EV_EVERY_ANG, + float(line_list[4]) * AU_TO_EV_EVERY_ANG, + float(line_list[5]) * AU_TO_EV_EVERY_ANG, + ] + ) + # print(atom_types_idx_list) + # atom_names=list(element_dict.keys()) + atom_names = self.atomic_kinds + atom_numbs = [] GPa = PressureConversion("eV/angstrom^3", "GPa").value() if stress: stress = np.array(stress) - stress = stress.astype('float32') + stress = stress.astype("float32") stress = stress[np.newaxis, :, :] # stress to virial conversion, default unit in cp2k is GPa # note the stress is virial = stress * volume - virial = stress * np.linalg.det(self.cell)/GPa + virial = stress * np.linalg.det(self.cell) / GPa virial = virial.squeeze() else: virial = None for ii in element_dict.keys(): atom_numbs.append(element_dict[ii][1]) - #print(atom_numbs) - info_dict['atom_names'] = atom_names - info_dict['atom_numbs'] = atom_numbs - info_dict['atom_types'] = np.asarray(atom_types_idx_list) - info_dict['print_level'] = self.print_level - info_dict['cells'] = np.asarray([self.cell]).astype('float32') - info_dict['energies'] = np.asarray([energy]).astype('float32') - info_dict['forces'] = np.asarray([forces_list]).astype('float32') - if(virial is not None ): - info_dict['virials'] = np.asarray([virial]).astype('float32') + # print(atom_numbs) + info_dict["atom_names"] = atom_names + info_dict["atom_numbs"] = atom_numbs + info_dict["atom_types"] = np.asarray(atom_types_idx_list) + info_dict["print_level"] = self.print_level + info_dict["cells"] = np.asarray([self.cell]).astype("float32") + info_dict["energies"] = np.asarray([energy]).astype("float32") + info_dict["forces"] = np.asarray([forces_list]).astype("float32") + if virial is not None: + info_dict["virials"] = np.asarray([virial]).astype("float32") return info_dict def handle_single_xyz_frame(self, lines): info_dict = {} - atom_num = int(lines[0].strip('\n').strip()) + atom_num = int(lines[0].strip("\n").strip()) if len(lines) != atom_num + 2: - raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines))) - data_format_line = lines[1].strip('\n').strip()+str(' ') - prop_pattern = re.compile(r'(?P\w+)\s*=\s*(?P.*?)[, ]') + raise RuntimeError( + "format error, atom_num=={}, {}!=atom_num+2".format( + atom_num, len(lines) + ) + ) + data_format_line = lines[1].strip("\n").strip() + str(" ") + prop_pattern = re.compile(r"(?P\w+)\s*=\s*(?P.*?)[, ]") prop_dict = dict(prop_pattern.findall(data_format_line)) - energy=0 - if prop_dict.get('E'): - energy = float(prop_dict.get('E')) * AU_TO_EV + energy = 0 + if prop_dict.get("E"): + energy = float(prop_dict.get("E")) * AU_TO_EV # info_dict['energies'] = np.array([prop_dict['E']]).astype('float32') element_index = -1 @@ -276,32 +345,34 @@ def handle_single_xyz_frame(self, lines): for line in lines[2:]: line_list = line.split() if element_dict.get(line_list[0]): - element_dict[line_list[0]][1]+=1 + element_dict[line_list[0]][1] += 1 else: - element_index +=1 - element_dict[line_list[0]]=[element_index,1] + element_index += 1 + element_dict[line_list[0]] = [element_index, 1] atom_types_list.append(element_dict[line_list[0]][0]) # coords_list.append([float(line_list[1])*AU_TO_ANG, # float(line_list[2])*AU_TO_ANG, # float(line_list[3])*AU_TO_ANG]) - coords_list.append([float(line_list[1]), - float(line_list[2]), - float(line_list[3])]) - atom_names=list(element_dict.keys()) - atom_numbs=[] + coords_list.append( + [float(line_list[1]), float(line_list[2]), float(line_list[3])] + ) + atom_names = list(element_dict.keys()) + atom_numbs = [] for ii in atom_names: atom_numbs.append(element_dict[ii][1]) - #info_dict['atom_names'] = atom_names - #info_dict['atom_numbs'] = atom_numbs - #info_dict['atom_types'] = np.asarray(atom_types_list) - info_dict['coords'] = np.asarray([coords_list]).astype('float32') - info_dict['energies'] = np.array([energy]).astype('float32') - info_dict['orig'] = np.zeros(3) + # info_dict['atom_names'] = atom_names + # info_dict['atom_numbs'] = atom_numbs + # info_dict['atom_types'] = np.asarray(atom_types_list) + info_dict["coords"] = np.asarray([coords_list]).astype("float32") + info_dict["energies"] = np.array([energy]).astype("float32") + info_dict["orig"] = np.zeros(3) return info_dict + #%% -def get_frames (fname) : + +def get_frames(fname): coord_flag = False force_flag = False stress_flag = False @@ -315,75 +386,73 @@ def get_frames (fname) : force = [] stress = [] - fp = open(fname) # check if output is converged, if not, return sys = 0 content = fp.read() - count = content.count('SCF run converged') + count = content.count("SCF run converged") if count == 0: return [], [], [], [], [], [], [], None - # search duplicated header + # search duplicated header fp.seek(0) header_idx = [] - for idx, ii in enumerate(fp) : - if 'Multiplication driver' in ii : + for idx, ii in enumerate(fp): + if "Multiplication driver" in ii: header_idx.append(idx) # parse from last header fp.seek(0) - for idx, ii in enumerate(fp) : - if idx > header_idx[-1] : - if 'CELL| Vector' in ii: + for idx, ii in enumerate(fp): + if idx > header_idx[-1]: + if "CELL| Vector" in ii: cell.append(ii.split()[4:7]) - if 'Atomic kind:' in ii: + if "Atomic kind:" in ii: atom_symbol_list.append(ii.split()[3]) - if 'Atom Kind Element' in ii : + if "Atom Kind Element" in ii: coord_flag = True coord_idx = idx - + # get the coord block info - if coord_flag : - if (idx > coord_idx + 1) : - if (ii == '\n') : + if coord_flag: + if idx > coord_idx + 1: + if ii == "\n": coord_flag = False - else : + else: coord.append(ii.split()[4:7]) atom_symbol_idx_list.append(ii.split()[1]) - if 'ENERGY|' in ii : - energy = (ii.split()[8]) - if ' Atom Kind ' in ii : + if "ENERGY|" in ii: + energy = ii.split()[8] + if " Atom Kind " in ii: force_flag = True force_idx = idx - if force_flag : - if (idx > force_idx) : - if 'SUM OF ATOMIC FORCES' in ii : + if force_flag: + if idx > force_idx: + if "SUM OF ATOMIC FORCES" in ii: force_flag = False - else : + else: force.append(ii.split()[3:6]) # add reading stress tensor - if 'STRESS TENSOR [GPa' in ii : + if "STRESS TENSOR [GPa" in ii: stress_flag = True stress_idx = idx - if stress_flag : - if (idx > stress_idx + 2): - if (ii == '\n') : + if stress_flag: + if idx > stress_idx + 2: + if ii == "\n": stress_flag = False - else : + else: stress.append(ii.split()[1:4]) - fp.close() - assert(coord), "cannot find coords" - assert(energy), "cannot find energies" - assert(force), "cannot find forces" + assert coord, "cannot find coords" + assert energy, "cannot find energies" + assert force, "cannot find forces" - #conver to float array and add extra dimension for nframes + # conver to float array and add extra dimension for nframes cell = np.array(cell) - cell = cell.astype('float32') + cell = cell.astype("float32") cell = cell[np.newaxis, :, :] coord = np.array(coord) - coord = coord.astype('float32') + coord = coord.astype("float32") coord = coord[np.newaxis, :, :] atom_symbol_idx_list = np.array(atom_symbol_idx_list) atom_symbol_idx_list = atom_symbol_idx_list.astype(int) @@ -391,17 +460,17 @@ def get_frames (fname) : atom_symbol_list = np.array(atom_symbol_list) atom_symbol_list = atom_symbol_list[atom_symbol_idx_list] force = np.array(force) - force = force.astype('float32') + force = force.astype("float32") force = force[np.newaxis, :, :] # virial is not necessary if stress: stress = np.array(stress) - stress = stress.astype('float32') + stress = stress.astype("float32") stress = stress[np.newaxis, :, :] # stress to virial conversion, default unit in cp2k is GPa # note the stress is virial = stress * volume - virial = stress * np.linalg.det(cell[0])/GPa + virial = stress * np.linalg.det(cell[0]) / GPa else: virial = None @@ -409,18 +478,17 @@ def get_frames (fname) : force = force * eV / angstrom # energy unit conversion, default unit in cp2k is hartree energy = float(energy) * eV - energy = np.array(energy).astype('float32') + energy = np.array(energy).astype("float32") energy = energy[np.newaxis] - tmp_names, symbol_idx = np.unique(atom_symbol_list, return_index=True) atom_types = [] atom_numbs = [] - #preserve the atom_name order + # preserve the atom_name order atom_names = atom_symbol_list[np.sort(symbol_idx)] for jj in atom_symbol_list: for idx, ii in enumerate(atom_names): - if (jj == ii) : + if jj == ii: atom_types.append(idx) for idx in range(len(atom_names)): atom_numbs.append(atom_types.count(idx)) @@ -430,6 +498,4 @@ def get_frames (fname) : return list(atom_names), atom_numbs, atom_types, cell, coord, energy, force, virial - - # %% diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index 3ac239b05..a5eb73349 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -1,117 +1,124 @@ -import os,glob,shutil +import os, glob, shutil import numpy as np from .raw import load_type -def _cond_load_data(fname) : + +def _cond_load_data(fname): tmp = None - if os.path.isfile(fname) : + if os.path.isfile(fname): tmp = np.load(fname) return tmp -def _load_set(folder, nopbc: bool) : - coords = np.load(os.path.join(folder, 'coord.npy')) + +def _load_set(folder, nopbc: bool): + coords = np.load(os.path.join(folder, "coord.npy")) if nopbc: - cells = np.zeros((coords.shape[0], 3,3)) + cells = np.zeros((coords.shape[0], 3, 3)) else: - cells = np.load(os.path.join(folder, 'box.npy')) - eners = _cond_load_data(os.path.join(folder, 'energy.npy')) - forces = _cond_load_data(os.path.join(folder, 'force.npy')) - virs = _cond_load_data(os.path.join(folder, 'virial.npy')) + cells = np.load(os.path.join(folder, "box.npy")) + eners = _cond_load_data(os.path.join(folder, "energy.npy")) + forces = _cond_load_data(os.path.join(folder, "force.npy")) + virs = _cond_load_data(os.path.join(folder, "virial.npy")) return cells, coords, eners, forces, virs -def to_system_data(folder, - type_map = None, - labels = True) : + +def to_system_data(folder, type_map=None, labels=True): # data is empty - data = load_type(folder, type_map = type_map) - data['orig'] = np.zeros([3]) + data = load_type(folder, type_map=type_map) + data["orig"] = np.zeros([3]) if os.path.isfile(os.path.join(folder, "nopbc")): - data['nopbc'] = True - sets = sorted(glob.glob(os.path.join(folder, 'set.*'))) + data["nopbc"] = True + sets = sorted(glob.glob(os.path.join(folder, "set.*"))) all_cells = [] all_coords = [] all_eners = [] all_forces = [] all_virs = [] - for ii in sets : - cells, coords, eners, forces, virs = _load_set(ii, data.get('nopbc', False)) - nframes = np.reshape(cells, [-1,3,3]).shape[0] - all_cells.append(np.reshape(cells, [nframes,3,3])) - all_coords.append(np.reshape(coords, [nframes,-1,3])) + for ii in sets: + cells, coords, eners, forces, virs = _load_set(ii, data.get("nopbc", False)) + nframes = np.reshape(cells, [-1, 3, 3]).shape[0] + all_cells.append(np.reshape(cells, [nframes, 3, 3])) + all_coords.append(np.reshape(coords, [nframes, -1, 3])) if eners is not None: eners = np.reshape(eners, [nframes]) if labels: if eners is not None and eners.size > 0: all_eners.append(np.reshape(eners, [nframes])) if forces is not None and forces.size > 0: - all_forces.append(np.reshape(forces, [nframes,-1,3])) + all_forces.append(np.reshape(forces, [nframes, -1, 3])) if virs is not None and virs.size > 0: - all_virs.append(np.reshape(virs, [nframes,3,3])) - data['cells'] = np.concatenate(all_cells, axis = 0) - data['coords'] = np.concatenate(all_coords, axis = 0) - if len(all_eners) > 0 : - data['energies'] = np.concatenate(all_eners, axis = 0) - if len(all_forces) > 0 : - data['forces'] = np.concatenate(all_forces, axis = 0) + all_virs.append(np.reshape(virs, [nframes, 3, 3])) + data["cells"] = np.concatenate(all_cells, axis=0) + data["coords"] = np.concatenate(all_coords, axis=0) + if len(all_eners) > 0: + data["energies"] = np.concatenate(all_eners, axis=0) + if len(all_forces) > 0: + data["forces"] = np.concatenate(all_forces, axis=0) if len(all_virs) > 0: - data['virials'] = np.concatenate(all_virs, axis = 0) + data["virials"] = np.concatenate(all_virs, axis=0) return data -def dump(folder, - data, - set_size = 5000, - comp_prec = np.float32, - remove_sets = True) : - os.makedirs(folder, exist_ok = True) - sets = sorted(glob.glob(os.path.join(folder, 'set.*'))) +def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): + os.makedirs(folder, exist_ok=True) + sets = sorted(glob.glob(os.path.join(folder, "set.*"))) if len(sets) > 0: - if remove_sets : - for ii in sets : + if remove_sets: + for ii in sets: shutil.rmtree(ii) - else : - raise RuntimeError('found ' + str(sets) + ' in ' + folder + 'not a clean deepmd raw dir. please firstly clean set.* then try compress') - # dump raw - np.savetxt(os.path.join(folder, 'type.raw'), data['atom_types'], fmt = '%d') - np.savetxt(os.path.join(folder, 'type_map.raw'), data['atom_names'], fmt = '%s') + else: + raise RuntimeError( + "found " + + str(sets) + + " in " + + folder + + "not a clean deepmd raw dir. please firstly clean set.* then try compress" + ) + # dump raw + np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d") + np.savetxt(os.path.join(folder, "type_map.raw"), data["atom_names"], fmt="%s") # BondOrder System if "bonds" in data: - np.savetxt(os.path.join(folder, "bonds.raw"), data['bonds'], header="begin_atom, end_atom, bond_order") + np.savetxt( + os.path.join(folder, "bonds.raw"), + data["bonds"], + header="begin_atom, end_atom, bond_order", + ) if "formal_charges" in data: - np.savetxt(os.path.join(folder, "formal_charges.raw"), data['formal_charges']) + np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"]) # reshape frame properties and convert prec - nframes = data['cells'].shape[0] - cells = np.reshape(data['cells'], [nframes, 9]).astype(comp_prec) - coords = np.reshape(data['coords'], [nframes, -1]).astype(comp_prec) + nframes = data["cells"].shape[0] + cells = np.reshape(data["cells"], [nframes, 9]).astype(comp_prec) + coords = np.reshape(data["coords"], [nframes, -1]).astype(comp_prec) eners = None forces = None virials = None - if 'energies' in data: - eners = np.reshape(data['energies'], [nframes ]).astype(comp_prec) - if 'forces' in data: - forces = np.reshape(data['forces'], [nframes, -1]).astype(comp_prec) - if 'virials' in data : - virials = np.reshape(data['virials'], [nframes, 9]).astype(comp_prec) - if 'atom_pref' in data: - atom_pref = np.reshape(data['atom_pref'], [nframes, -1]).astype(comp_prec) + if "energies" in data: + eners = np.reshape(data["energies"], [nframes]).astype(comp_prec) + if "forces" in data: + forces = np.reshape(data["forces"], [nframes, -1]).astype(comp_prec) + if "virials" in data: + virials = np.reshape(data["virials"], [nframes, 9]).astype(comp_prec) + if "atom_pref" in data: + atom_pref = np.reshape(data["atom_pref"], [nframes, -1]).astype(comp_prec) # dump frame properties: cell, coord, energy, force and virial nsets = nframes // set_size - if set_size * nsets < nframes : + if set_size * nsets < nframes: nsets += 1 - for ii in range(nsets) : + for ii in range(nsets): set_stt = ii * set_size - set_end = (ii+1) * set_size - set_folder = os.path.join(folder, 'set.%03d' % ii) + set_end = (ii + 1) * set_size + set_folder = os.path.join(folder, "set.%03d" % ii) os.makedirs(set_folder) - np.save(os.path.join(set_folder, 'box'), cells [set_stt:set_end]) - np.save(os.path.join(set_folder, 'coord'), coords [set_stt:set_end]) + np.save(os.path.join(set_folder, "box"), cells[set_stt:set_end]) + np.save(os.path.join(set_folder, "coord"), coords[set_stt:set_end]) if eners is not None: - np.save(os.path.join(set_folder, 'energy'), eners [set_stt:set_end]) + np.save(os.path.join(set_folder, "energy"), eners[set_stt:set_end]) if forces is not None: - np.save(os.path.join(set_folder, 'force'), forces [set_stt:set_end]) + np.save(os.path.join(set_folder, "force"), forces[set_stt:set_end]) if virials is not None: - np.save(os.path.join(set_folder, 'virial'), virials[set_stt:set_end]) - if 'atom_pref' in data: + np.save(os.path.join(set_folder, "virial"), virials[set_stt:set_end]) + if "atom_pref" in data: np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end]) try: os.remove(os.path.join(folder, "nopbc")) @@ -120,4 +127,3 @@ def dump(folder, if data.get("nopbc", False): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass - diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py index 0e807f058..1e44b7902 100644 --- a/dpdata/deepmd/hdf5.py +++ b/dpdata/deepmd/hdf5.py @@ -7,12 +7,15 @@ from wcmatch.glob import globfilter -__all__ = ['to_system_data', 'dump'] +__all__ = ["to_system_data", "dump"] -def to_system_data(f: Union[h5py.File, h5py.Group], - folder: str, - type_map: Optional[list] = None, - labels: bool = True) : + +def to_system_data( + f: Union[h5py.File, h5py.Group], + folder: str, + type_map: Optional[list] = None, + labels: bool = True, +): """Load a HDF5 file. Parameters @@ -29,67 +32,94 @@ def to_system_data(f: Union[h5py.File, h5py.Group], g = f[folder] if folder else f data = {} - data['atom_types'] = g['type.raw'][:] - ntypes = np.max(data['atom_types']) + 1 - natoms = data['atom_types'].size - data['atom_numbs'] = [] - for ii in range (ntypes) : - data['atom_numbs'].append(np.count_nonzero(data['atom_types'] == ii)) - data['atom_names'] = [] + data["atom_types"] = g["type.raw"][:] + ntypes = np.max(data["atom_types"]) + 1 + natoms = data["atom_types"].size + data["atom_numbs"] = [] + for ii in range(ntypes): + data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) + data["atom_names"] = [] # if find type_map.raw, use it - if 'type_map.raw' in g.keys(): - my_type_map = list(np.char.decode(g['type_map.raw'][:])) - # else try to use arg type_map + if "type_map.raw" in g.keys(): + my_type_map = list(np.char.decode(g["type_map.raw"][:])) + # else try to use arg type_map elif type_map is not None: my_type_map = type_map # in the last case, make artificial atom names else: my_type_map = [] - for ii in range(ntypes) : - my_type_map.append('Type_%d' % ii) - assert(len(my_type_map) >= len(data['atom_numbs'])) - for ii in range(len(data['atom_numbs'])) : - data['atom_names'].append(my_type_map[ii]) + for ii in range(ntypes): + my_type_map.append("Type_%d" % ii) + assert len(my_type_map) >= len(data["atom_numbs"]) + for ii in range(len(data["atom_numbs"])): + data["atom_names"].append(my_type_map[ii]) - data['orig'] = np.zeros([3]) - if 'nopbc' in g.keys(): - data['nopbc'] = True - sets = globfilter(g.keys(), 'set.*') + data["orig"] = np.zeros([3]) + if "nopbc" in g.keys(): + data["nopbc"] = True + sets = globfilter(g.keys(), "set.*") data_types = { - 'cells': {'fn': 'box', 'labeled': False, 'shape': (3,3), 'required': 'nopbc' not in data}, - 'coords': {'fn': 'coord', 'labeled': False, 'shape': (natoms,3), 'required': True}, - 'energies': {'fn': 'energy', 'labeled': True, 'shape': tuple(), 'required': False}, - 'forces': {'fn': 'force', 'labeled': True, 'shape': (natoms,3), 'required': False}, - 'virials': {'fn': 'virial', 'labeled': True, 'shape': (3,3), 'required': False}, + "cells": { + "fn": "box", + "labeled": False, + "shape": (3, 3), + "required": "nopbc" not in data, + }, + "coords": { + "fn": "coord", + "labeled": False, + "shape": (natoms, 3), + "required": True, + }, + "energies": { + "fn": "energy", + "labeled": True, + "shape": tuple(), + "required": False, + }, + "forces": { + "fn": "force", + "labeled": True, + "shape": (natoms, 3), + "required": False, + }, + "virials": { + "fn": "virial", + "labeled": True, + "shape": (3, 3), + "required": False, + }, } - + for dt, prop in data_types.items(): all_data = [] for ii in sets: set = g[ii] - fn = '%s.npy' % prop['fn'] + fn = "%s.npy" % prop["fn"] if fn in set.keys(): dd = set[fn][:] nframes = dd.shape[0] - all_data.append(np.reshape(dd, (nframes, *prop['shape']))) - elif prop['required']: + all_data.append(np.reshape(dd, (nframes, *prop["shape"]))) + elif prop["required"]: raise RuntimeError("%s/%s/%s not found" % (folder, ii, fn)) - - if len(all_data) > 0 : - data[dt] = np.concatenate(all_data, axis = 0) - if 'cells' not in data: - nframes = data['coords'].shape[0] - data['cells'] = np.zeros((nframes, 3, 3)) + + if len(all_data) > 0: + data[dt] = np.concatenate(all_data, axis=0) + if "cells" not in data: + nframes = data["coords"].shape[0] + data["cells"] = np.zeros((nframes, 3, 3)) return data -def dump(f: Union[h5py.File, h5py.Group], - folder: str, - data: dict, - set_size = 5000, - comp_prec = np.float32, - ) -> None: + +def dump( + f: Union[h5py.File, h5py.Group], + folder: str, + data: dict, + set_size=5000, + comp_prec=np.float32, +) -> None: """Dump data to a HDF5 file. Parameters @@ -113,42 +143,46 @@ def dump(f: Union[h5py.File, h5py.Group], else: g = f # dump raw (array in fact) - g.create_dataset('type.raw', data=data['atom_types']) - g.create_dataset('type_map.raw', data=np.array(data['atom_names'], dtype='S')) + g.create_dataset("type.raw", data=data["atom_types"]) + g.create_dataset("type_map.raw", data=np.array(data["atom_names"], dtype="S")) # BondOrder System if "bonds" in data: - g.create_dataset("bonds.raw", data=data['bonds']) + g.create_dataset("bonds.raw", data=data["bonds"]) if "formal_charges" in data: - g.create_dataset("formal_charges.raw", data=data['formal_charges']) + g.create_dataset("formal_charges.raw", data=data["formal_charges"]) # reshape frame properties and convert prec - nframes = data['cells'].shape[0] + nframes = data["cells"].shape[0] nopbc = data.get("nopbc", False) reshaped_data = {} data_types = { - 'cells': {'fn': 'box', 'shape': (nframes, 9), 'dump': not nopbc}, - 'coords': {'fn': 'coord', 'shape': (nframes, -1), 'dump': True}, - 'energies': {'fn': 'energy', 'shape': (nframes,), 'dump': True}, - 'forces': {'fn': 'force', 'shape': (nframes, -1), 'dump': True}, - 'virials': {'fn': 'virial', 'shape': (nframes, 9), 'dump': True}, + "cells": {"fn": "box", "shape": (nframes, 9), "dump": not nopbc}, + "coords": {"fn": "coord", "shape": (nframes, -1), "dump": True}, + "energies": {"fn": "energy", "shape": (nframes,), "dump": True}, + "forces": {"fn": "force", "shape": (nframes, -1), "dump": True}, + "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True}, } for dt, prop in data_types.items(): if dt in data: - if prop['dump']: - reshaped_data[dt] = np.reshape(data[dt], prop['shape']).astype(comp_prec) + if prop["dump"]: + reshaped_data[dt] = np.reshape(data[dt], prop["shape"]).astype( + comp_prec + ) # dump frame properties: cell, coord, energy, force and virial nsets = nframes // set_size - if set_size * nsets < nframes : + if set_size * nsets < nframes: nsets += 1 - for ii in range(nsets) : + for ii in range(nsets): set_stt = ii * set_size - set_end = (ii+1) * set_size - set_folder = g.create_group('set.%03d' % ii) + set_end = (ii + 1) * set_size + set_folder = g.create_group("set.%03d" % ii) for dt, prop in data_types.items(): if dt in reshaped_data: - set_folder.create_dataset('%s.npy' % prop['fn'], data=reshaped_data[dt][set_stt:set_end]) + set_folder.create_dataset( + "%s.npy" % prop["fn"], data=reshaped_data[dt][set_stt:set_end] + ) if nopbc: - g.create_dataset("nopbc", data=True) + g.create_dataset("nopbc", data=True) diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index 49744d0e5..7de14baab 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -1,83 +1,99 @@ import os import numpy as np -def load_type(folder, type_map = None) : + +def load_type(folder, type_map=None): data = {} - data['atom_types'] \ - = np.loadtxt(os.path.join(folder, 'type.raw'), ndmin=1).astype(int) - ntypes = np.max(data['atom_types']) + 1 - data['atom_numbs'] = [] - for ii in range (ntypes) : - data['atom_numbs'].append(np.count_nonzero(data['atom_types'] == ii)) - data['atom_names'] = [] + data["atom_types"] = np.loadtxt(os.path.join(folder, "type.raw"), ndmin=1).astype( + int + ) + ntypes = np.max(data["atom_types"]) + 1 + data["atom_numbs"] = [] + for ii in range(ntypes): + data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) + data["atom_names"] = [] # if find type_map.raw, use it - if os.path.isfile(os.path.join(folder, 'type_map.raw')) : - with open(os.path.join(folder, 'type_map.raw')) as fp: + if os.path.isfile(os.path.join(folder, "type_map.raw")): + with open(os.path.join(folder, "type_map.raw")) as fp: my_type_map = fp.read().split() - # else try to use arg type_map + # else try to use arg type_map elif type_map is not None: my_type_map = type_map # in the last case, make artificial atom names else: my_type_map = [] - for ii in range(ntypes) : - my_type_map.append('Type_%d' % ii) - assert(len(my_type_map) >= len(data['atom_numbs'])) - for ii in range(len(data['atom_numbs'])) : - data['atom_names'].append(my_type_map[ii]) + for ii in range(ntypes): + my_type_map.append("Type_%d" % ii) + assert len(my_type_map) >= len(data["atom_numbs"]) + for ii in range(len(data["atom_numbs"])): + data["atom_names"].append(my_type_map[ii]) return data -def to_system_data(folder, type_map = None, labels = True) : - if os.path.isdir(folder) : - data = load_type(folder, type_map = type_map) - data['orig'] = np.zeros([3]) - data['coords'] = np.loadtxt(os.path.join(folder, 'coord.raw'), ndmin=2) - nframes = data['coords'].shape[0] +def to_system_data(folder, type_map=None, labels=True): + if os.path.isdir(folder): + data = load_type(folder, type_map=type_map) + data["orig"] = np.zeros([3]) + data["coords"] = np.loadtxt(os.path.join(folder, "coord.raw"), ndmin=2) + nframes = data["coords"].shape[0] if os.path.isfile(os.path.join(folder, "nopbc")): - data['nopbc'] = True - data['cells'] = np.zeros((nframes, 3,3)) + data["nopbc"] = True + data["cells"] = np.zeros((nframes, 3, 3)) else: - data['cells'] = np.loadtxt(os.path.join(folder, 'box.raw'), ndmin=2) - data['cells'] = np.reshape(data['cells'], [nframes, 3, 3]) - data['coords'] = np.reshape(data['coords'], [nframes, -1, 3]) - if labels : - if os.path.exists(os.path.join(folder, 'energy.raw')) : - data['energies'] = np.loadtxt(os.path.join(folder, 'energy.raw')) - data['energies'] = np.reshape(data['energies'], [nframes]) - if os.path.exists(os.path.join(folder, 'force.raw')) : - data['forces'] = np.loadtxt(os.path.join(folder, 'force.raw')) - data['forces'] = np.reshape(data['forces'], [nframes, -1, 3]) - if os.path.exists(os.path.join(folder, 'virial.raw')) : - data['virials'] = np.loadtxt(os.path.join(folder, 'virial.raw')) - data['virials'] = np.reshape(data['virials'], [nframes, 3, 3]) + data["cells"] = np.loadtxt(os.path.join(folder, "box.raw"), ndmin=2) + data["cells"] = np.reshape(data["cells"], [nframes, 3, 3]) + data["coords"] = np.reshape(data["coords"], [nframes, -1, 3]) + if labels: + if os.path.exists(os.path.join(folder, "energy.raw")): + data["energies"] = np.loadtxt(os.path.join(folder, "energy.raw")) + data["energies"] = np.reshape(data["energies"], [nframes]) + if os.path.exists(os.path.join(folder, "force.raw")): + data["forces"] = np.loadtxt(os.path.join(folder, "force.raw")) + data["forces"] = np.reshape(data["forces"], [nframes, -1, 3]) + if os.path.exists(os.path.join(folder, "virial.raw")): + data["virials"] = np.loadtxt(os.path.join(folder, "virial.raw")) + data["virials"] = np.reshape(data["virials"], [nframes, 3, 3]) if os.path.isfile(os.path.join(folder, "nopbc")): - data['nopbc'] = True + data["nopbc"] = True return data - else : - raise RuntimeError('not dir ' + folder) + else: + raise RuntimeError("not dir " + folder) -def dump (folder, data) : - os.makedirs(folder, exist_ok = True) - nframes = data['cells'].shape[0] - np.savetxt(os.path.join(folder, 'type.raw'), data['atom_types'], fmt = '%d') - np.savetxt(os.path.join(folder, 'type_map.raw'), data['atom_names'], fmt = '%s') - np.savetxt(os.path.join(folder, 'box.raw'), np.reshape(data['cells'], [nframes, 9])) - np.savetxt(os.path.join(folder, 'coord.raw'), np.reshape(data['coords'], [nframes, -1])) +def dump(folder, data): + os.makedirs(folder, exist_ok=True) + nframes = data["cells"].shape[0] + np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d") + np.savetxt(os.path.join(folder, "type_map.raw"), data["atom_names"], fmt="%s") + np.savetxt(os.path.join(folder, "box.raw"), np.reshape(data["cells"], [nframes, 9])) + np.savetxt( + os.path.join(folder, "coord.raw"), np.reshape(data["coords"], [nframes, -1]) + ) # BondOrder System if "bonds" in data: - np.savetxt(os.path.join(folder, "bonds.raw"), data['bonds'], header="begin_atom, end_atom, bond_order") + np.savetxt( + os.path.join(folder, "bonds.raw"), + data["bonds"], + header="begin_atom, end_atom, bond_order", + ) if "formal_charges" in data: - np.savetxt(os.path.join(folder, "formal_charges.raw"), data['formal_charges']) + np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"]) # Labeled System - if 'energies' in data : - np.savetxt(os.path.join(folder, 'energy.raw'), np.reshape(data['energies'], [nframes, 1])) - if 'forces' in data : - np.savetxt(os.path.join(folder, 'force.raw'), np.reshape(data['forces'], [nframes, -1])) - if 'virials' in data : - np.savetxt(os.path.join(folder, 'virial.raw'), np.reshape(data['virials'], [nframes, 9])) + if "energies" in data: + np.savetxt( + os.path.join(folder, "energy.raw"), + np.reshape(data["energies"], [nframes, 1]), + ) + if "forces" in data: + np.savetxt( + os.path.join(folder, "force.raw"), np.reshape(data["forces"], [nframes, -1]) + ) + if "virials" in data: + np.savetxt( + os.path.join(folder, "virial.raw"), + np.reshape(data["virials"], [nframes, 9]), + ) try: os.remove(os.path.join(folder, "nopbc")) except OSError: @@ -85,4 +101,3 @@ def dump (folder, data) : if data.get("nopbc", False): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass - diff --git a/dpdata/driver.py b/dpdata/driver.py index 670b03378..0f9039472 100644 --- a/dpdata/driver.py +++ b/dpdata/driver.py @@ -6,6 +6,7 @@ if TYPE_CHECKING: import ase + class Driver(ABC): """The base class for a driver plugin. A driver can label a pure System to generate the LabeledSystem. @@ -14,17 +15,18 @@ class Driver(ABC): -------- dpdata.plugins.deepmd.DPDriver : an example of Driver """ + __DriverPlugin = Plugin() @staticmethod def register(key: str) -> Callable: """Register a driver plugin. Used as decorators. - + Parameter --------- key: str key of the plugin. - + Returns ------- Callable @@ -41,17 +43,17 @@ def register(key: str) -> Callable: @staticmethod def get_driver(key: str) -> "Driver": """Get a driver plugin. - + Parameter --------- key: str key of the plugin. - + Returns ------- Driver the specific driver class - + Raises ------ RuntimeError @@ -60,20 +62,20 @@ def get_driver(key: str) -> "Driver": try: return Driver.__DriverPlugin.plugins[key] except KeyError as e: - raise RuntimeError('Unknown driver: ' + key) from e - + raise RuntimeError("Unknown driver: " + key) from e + def __init__(self, *args, **kwargs) -> None: """Setup the driver.""" @abstractmethod def label(self, data: dict) -> dict: """Label a system data. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -85,6 +87,7 @@ def label(self, data: dict) -> dict: def ase_calculator(self) -> "ase.calculators.calculator.Calculator": """Returns an ase calculator based on this driver.""" from .ase_calculator import DPDataCalculator + return DPDataCalculator(self) @@ -112,6 +115,7 @@ class HybridDriver(Driver): ... ]) This driver is the hybrid of SQM and DP. """ + def __init__(self, drivers: List[Union[dict, Driver]]) -> None: self.drivers = [] for driver in drivers: @@ -128,12 +132,12 @@ def label(self, data: dict) -> dict: """Label a system data. Energies and forces are the sum of those of each driver. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -144,8 +148,8 @@ def label(self, data: dict) -> dict: if ii == 0: labeled_data = lb_data.copy() else: - labeled_data['energies'] += lb_data ['energies'] - labeled_data['forces'] += lb_data ['forces'] + labeled_data["energies"] += lb_data["energies"] + labeled_data["forces"] += lb_data["forces"] return labeled_data @@ -153,17 +157,18 @@ class Minimizer(ABC): """The base class for a minimizer plugin. A minimizer can minimize geometry. """ + __MinimizerPlugin = Plugin() @staticmethod def register(key: str) -> Callable: """Register a minimizer plugin. Used as decorators. - + Parameter --------- key: str key of the plugin. - + Returns ------- Callable @@ -180,17 +185,17 @@ def register(key: str) -> Callable: @staticmethod def get_minimizer(key: str) -> "Minimizer": """Get a minimizer plugin. - + Parameter --------- key: str key of the plugin. - + Returns ------- Minimizer the specific minimizer class - + Raises ------ RuntimeError @@ -199,7 +204,7 @@ def get_minimizer(key: str) -> "Minimizer": try: return Minimizer.__MinimizerPlugin.plugins[key] except KeyError as e: - raise RuntimeError('Unknown minimizer: ' + key) from e + raise RuntimeError("Unknown minimizer: " + key) from e def __init__(self, *args, **kwargs) -> None: """Setup the minimizer.""" @@ -212,7 +217,7 @@ def minimize(self, data: dict) -> dict: ---------- data : dict data with coordinates and atom types - + Returns ------- dict diff --git a/dpdata/fhi_aims/output.py b/dpdata/fhi_aims/output.py index 1a1b2c579..423957b7c 100755 --- a/dpdata/fhi_aims/output.py +++ b/dpdata/fhi_aims/output.py @@ -2,15 +2,17 @@ import re import warnings -latt_patt="\|\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)" -pos_patt_first="\|\s+[0-9]{1,}[:]\s\w+\s(\w+)(\s.*[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)" -pos_patt_other="\s+[a][t][o][m]\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+(\w{1,2})" -force_patt="\|\s+[0-9]{1,}\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})" -eng_patt="Total energy uncorrected.*([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+eV" -#atom_numb_patt="Number of atoms.*([0-9]{1,})" +latt_patt = "\|\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)\s+([0-9]{1,}[.][0-9]*)" +pos_patt_first = "\|\s+[0-9]{1,}[:]\s\w+\s(\w+)(\s.*[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)(\s+[-]?[0-9]{1,}[.][0-9]*)" +pos_patt_other = "\s+[a][t][o][m]\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+([-]?[0-9]{1,}[.][0-9]*)\s+(\w{1,2})" +force_patt = "\|\s+[0-9]{1,}\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})" +eng_patt = "Total energy uncorrected.*([-]?[0-9]{1,}[.][0-9]*[E][+-][0-9]{1,})\s+eV" +# atom_numb_patt="Number of atoms.*([0-9]{1,})" debug = False -def get_info (lines, type_idx_zero = False) : + + +def get_info(lines, type_idx_zero=False): atom_types = [] atom_names = [] @@ -18,163 +20,184 @@ def get_info (lines, type_idx_zero = False) : atom_numbs = None _atom_names = [] - contents="\n".join(lines) - #cell - #_tmp=re.findall(latt_patt,contents) - #for ii in _tmp: + contents = "\n".join(lines) + # cell + # _tmp=re.findall(latt_patt,contents) + # for ii in _tmp: # vect=[float(kk) for kk in ii] # cell.append(vect) - #------------------ - for ln,l in enumerate(lines): - if l.startswith(' | Unit cell'): + # ------------------ + for ln, l in enumerate(lines): + if l.startswith(" | Unit cell"): break - _tmp=lines[ln+1:ln+4] + _tmp = lines[ln + 1 : ln + 4] for ii in _tmp: - v_str=ii.split('|')[1].split() - vect=[float(kk) for kk in v_str] + v_str = ii.split("|")[1].split() + vect = [float(kk) for kk in v_str] cell.append(vect) - _tmp=re.findall(pos_patt_first,contents) + _tmp = re.findall(pos_patt_first, contents) for ii in _tmp: _atom_names.append(ii[0]) - atom_names=[] + atom_names = [] for ii in _atom_names: if not ii in atom_names: - atom_names.append(ii) - - atom_numbs =[_atom_names.count(ii) for ii in atom_names] - if type_idx_zero : - type_map=dict(zip(atom_names,range(len(atom_names)))) + atom_names.append(ii) + + atom_numbs = [_atom_names.count(ii) for ii in atom_names] + if type_idx_zero: + type_map = dict(zip(atom_names, range(len(atom_names)))) else: - type_map=dict(zip(atom_names,range(1,len(atom_names)+1))) - atom_types=list(map(lambda k: type_map[k], _atom_names)) - assert(atom_numbs is not None), "cannot find ion type info in aims output" - + type_map = dict(zip(atom_names, range(1, len(atom_names) + 1))) + atom_types = list(map(lambda k: type_map[k], _atom_names)) + assert atom_numbs is not None, "cannot find ion type info in aims output" - return [cell, atom_numbs, atom_names, atom_types ] + return [cell, atom_numbs, atom_names, atom_types] -def get_fhi_aims_block(fp) : +def get_fhi_aims_block(fp): blk = [] - for ii in fp : - if not ii : + for ii in fp: + if not ii: return blk - blk.append(ii.rstrip('\n')) - if 'Begin self-consistency loop: Re-initialization' in ii: + blk.append(ii.rstrip("\n")) + if "Begin self-consistency loop: Re-initialization" in ii: return blk return blk -def get_frames (fname, md=True, begin = 0, step = 1, convergence_check=True) : + +def get_frames(fname, md=True, begin=0, step=1, convergence_check=True): fp = open(fname) blk = get_fhi_aims_block(fp) - ret = get_info(blk, type_idx_zero = True) + ret = get_info(blk, type_idx_zero=True) - cell, atom_numbs, atom_names, atom_types =ret[0],ret[1],ret[2],ret[3] + cell, atom_numbs, atom_names, atom_types = ret[0], ret[1], ret[2], ret[3] ntot = sum(atom_numbs) all_coords = [] all_cells = [] all_energies = [] all_forces = [] - all_virials = [] + all_virials = [] cc = 0 rec_failed = [] - while len(blk) > 0 : + while len(blk) > 0: if debug: - with open(str(cc),'w') as f: - f.write('\n'.join(blk)) - if cc >= begin and (cc - begin) % step == 0 : - if cc==0: - coord, _cell, energy, force, virial, is_converge = analyze_block(blk, first_blk=True, md=md) + with open(str(cc), "w") as f: + f.write("\n".join(blk)) + if cc >= begin and (cc - begin) % step == 0: + if cc == 0: + coord, _cell, energy, force, virial, is_converge = analyze_block( + blk, first_blk=True, md=md + ) else: - coord, _cell, energy, force, virial, is_converge = analyze_block(blk, first_blk=False) + coord, _cell, energy, force, virial, is_converge = analyze_block( + blk, first_blk=False + ) if len(coord) == 0: break - if is_converge or not convergence_check: + if is_converge or not convergence_check: all_coords.append(coord) if _cell: - all_cells.append(_cell) + all_cells.append(_cell) else: - all_cells.append(cell) + all_cells.append(cell) all_energies.append(energy) all_forces.append(force) - if virial is not None : + if virial is not None: all_virials.append(virial) if not is_converge: - rec_failed.append(cc+1) - + rec_failed.append(cc + 1) + blk = get_fhi_aims_block(fp) cc += 1 - - if len(rec_failed) > 0 : - prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." - warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) - - if len(all_virials) == 0 : + + if len(rec_failed) > 0: + prt = ( + "so they are not collected." + if convergence_check + else "but they are still collected due to the requirement for ignoring convergence checks." + ) + warnings.warn( + f"The following structures were unconverged: {rec_failed}; " + prt + ) + + if len(all_virials) == 0: all_virials = None - else : + else: all_virials = np.array(all_virials) fp.close() - return atom_names, atom_numbs, np.array(atom_types), np.array(all_cells), np.array(all_coords), np.array(all_energies), np.array(all_forces), all_virials - - -def analyze_block(lines, first_blk=False, md=True) : + return ( + atom_names, + atom_numbs, + np.array(atom_types), + np.array(all_cells), + np.array(all_coords), + np.array(all_energies), + np.array(all_forces), + all_virials, + ) + + +def analyze_block(lines, first_blk=False, md=True): coord = [] cell = [] energy = None force = [] virial = None - atom_names=[] - _atom_names=[] + atom_names = [] + _atom_names = [] - contents="\n".join(lines) + contents = "\n".join(lines) try: - natom=int(re.findall("Number of atoms.*([0-9]{1,})",lines)[0]) + natom = int(re.findall("Number of atoms.*([0-9]{1,})", lines)[0]) except Exception: - natom=0 + natom = 0 if first_blk: - if md: - _tmp=re.findall(pos_patt_other,contents)[:] - for ii in _tmp[slice(int(len(_tmp)/2),len(_tmp))]: - coord.append([float(kk) for kk in ii[:-1]]) - else: - _tmp=re.findall(pos_patt_first,contents) - for ii in _tmp: - coord.append([float(kk) for kk in ii[1:]]) + if md: + _tmp = re.findall(pos_patt_other, contents)[:] + for ii in _tmp[slice(int(len(_tmp) / 2), len(_tmp))]: + coord.append([float(kk) for kk in ii[:-1]]) + else: + _tmp = re.findall(pos_patt_first, contents) + for ii in _tmp: + coord.append([float(kk) for kk in ii[1:]]) else: - _tmp=re.findall(pos_patt_other,contents) - for ii in _tmp: - coord.append([float(kk) for kk in ii[:-1]]) + _tmp = re.findall(pos_patt_other, contents) + for ii in _tmp: + coord.append([float(kk) for kk in ii[:-1]]) - _tmp=re.findall(force_patt,contents) + _tmp = re.findall(force_patt, contents) for ii in _tmp: force.append([float(kk) for kk in ii]) if "Self-consistency cycle converged" in contents: - is_converge=True + is_converge = True else: - is_converge=False + is_converge = False try: - _eng_patt=re.compile(eng_patt) - energy=float(_eng_patt.search(contents).group().split()[-2]) + _eng_patt = re.compile(eng_patt) + energy = float(_eng_patt.search(contents).group().split()[-2]) except Exception: - energy=None - + energy = None + if not energy: - is_converge = False + is_converge = False if energy: - assert((force is not None) and len(coord) > 0 ) + assert (force is not None) and len(coord) > 0 return coord, cell, energy, force, virial, is_converge -if __name__=='__main__': - import sys - ret=get_frames (sys.argv[1], begin = 0, step = 1) - print(ret) + +if __name__ == "__main__": + import sys + + ret = get_frames(sys.argv[1], begin=0, step=1) + print(ret) diff --git a/dpdata/format.py b/dpdata/format.py index b96c374a3..84813c760 100644 --- a/dpdata/format.py +++ b/dpdata/format.py @@ -22,7 +22,7 @@ def register_from(key): @staticmethod def register_to(key): return Format.__ToPlugin.register(key) - + @staticmethod def get_formats(): return Format.__FormatPlugin.plugins @@ -34,7 +34,7 @@ def get_from_methods(): @staticmethod def get_to_methods(): return Format.__ToPlugin.plugins - + @staticmethod def post(func_name): def decorator(object): @@ -43,6 +43,7 @@ def decorator(object): else: object.post_func = func_name return object + return decorator def from_system(self, file_name, **kwargs): @@ -58,7 +59,9 @@ def from_system(self, file_name, **kwargs): data: dict system data """ - raise NotImplementedError("%s doesn't support System.from" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support System.from" % (self.__class__.__name__) + ) def to_system(self, data, *args, **kwargs): """System.to @@ -68,16 +71,22 @@ def to_system(self, data, *args, **kwargs): data: dict system data """ - raise NotImplementedError("%s doesn't support System.to" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support System.to" % (self.__class__.__name__) + ) def from_labeled_system(self, file_name, **kwargs): - raise NotImplementedError("%s doesn't support LabeledSystem.from" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support LabeledSystem.from" % (self.__class__.__name__) + ) def to_labeled_system(self, data, *args, **kwargs): return self.to_system(data, *args, **kwargs) def from_bond_order_system(self, file_name, **kwargs): - raise NotImplementedError("%s doesn't support BondOrderSystem.from" %(self.__class__.__name__)) + raise NotImplementedError( + "%s doesn't support BondOrderSystem.from" % (self.__class__.__name__) + ) def to_bond_order_system(self, data, rdkit_mol, *args, **kwargs): return self.to_system(data, *args, **kwargs) @@ -87,6 +96,7 @@ class MultiModes: 0 (default): not implemented 1: every directory under the top-level directory is a system """ + NotImplemented = 0 Directory = 1 @@ -94,23 +104,30 @@ class MultiModes: def from_multi_systems(self, directory, **kwargs): """MultiSystems.from - + Parameters ---------- directory: str directory of system - + Returns ------- filenames: list[str] list of filenames """ if self.MultiMode == self.MultiModes.Directory: - return [os.path.join(directory, name) for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))] - raise NotImplementedError("%s doesn't support MultiSystems.from" %(self.__class__.__name__)) + return [ + os.path.join(directory, name) + for name in os.listdir(directory) + if os.path.isdir(os.path.join(directory, name)) + ] + raise NotImplementedError( + "%s doesn't support MultiSystems.from" % (self.__class__.__name__) + ) def to_multi_systems(self, formulas, directory, **kwargs): if self.MultiMode == self.MultiModes.Directory: return [os.path.join(directory, ff) for ff in formulas] - raise NotImplementedError("%s doesn't support MultiSystems.to" %(self.__class__.__name__)) - + raise NotImplementedError( + "%s doesn't support MultiSystems.to" % (self.__class__.__name__) + ) diff --git a/dpdata/gaussian/gjf.py b/dpdata/gaussian/gjf.py index 6c169b48c..be089e246 100644 --- a/dpdata/gaussian/gjf.py +++ b/dpdata/gaussian/gjf.py @@ -10,6 +10,7 @@ import numpy as np from scipy.sparse import csr_matrix from scipy.sparse.csgraph import connected_components + try: from openbabel import openbabel except ImportError: @@ -20,10 +21,9 @@ from dpdata.periodic_table import Element - def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]: """Detect fragments from coordinates. - + Parameters ---------- symbols : list[str] @@ -52,7 +52,9 @@ def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]: if Open Babel is not installed """ if openbabel is None: - raise ImportError("Open Babel (Python interface) should be installed to detect fragmentation!") + raise ImportError( + "Open Babel (Python interface) should be installed to detect fragmentation!" + ) atomnumber = len(symbols) # Use openbabel to connect atoms mol = openbabel.OBMol() @@ -74,14 +76,15 @@ def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]: bonds.extend([[a, b, bo], [b, a, bo]]) bonds = np.array(bonds, ndmin=2).reshape((-1, 3)) graph = csr_matrix( - (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber)) + (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber) + ) frag_numb, frag_index = connected_components(graph, 0) return frag_numb, frag_index def detect_multiplicity(symbols: np.ndarray) -> int: """Find the minimal multiplicity of the given molecules. - + Parameters ---------- symbols : np.ndarray @@ -102,15 +105,15 @@ def detect_multiplicity(symbols: np.ndarray) -> int: def make_gaussian_input( - sys_data: dict, - keywords: Union[str, List[str]], - multiplicity: Union[str ,int] = "auto", - charge: int = 0, - fragment_guesses: bool = False, - basis_set: Optional[str] = None, - keywords_high_multiplicity: Optional[str] = None, - nproc: int = 1, - ) -> str: + sys_data: dict, + keywords: Union[str, List[str]], + multiplicity: Union[str, int] = "auto", + charge: int = 0, + fragment_guesses: bool = False, + basis_set: Optional[str] = None, + keywords_high_multiplicity: Optional[str] = None, + nproc: int = 1, +) -> str: """Make gaussian input file. Parameters @@ -149,21 +152,21 @@ def make_gaussian_input( str gjf output string """ - coordinates = sys_data['coords'][0] - atom_names = sys_data['atom_names'] - atom_numbs = sys_data['atom_numbs'] - atom_types = sys_data['atom_types'] + coordinates = sys_data["coords"][0] + atom_names = sys_data["atom_names"] + atom_numbs = sys_data["atom_numbs"] + atom_types = sys_data["atom_types"] # get atom symbols list symbols = [atom_names[atom_type] for atom_type in atom_types] # assume default charge is zero and default spin multiplicity is 1 - if 'charge' in sys_data.keys(): - charge = sys_data['charge'] - + if "charge" in sys_data.keys(): + charge = sys_data["charge"] + use_fragment_guesses = False if isinstance(multiplicity, int): mult_auto = False - elif multiplicity == 'auto': + elif multiplicity == "auto": mult_auto = True else: raise RuntimeError('The keyword "multiplicity" is illegal.') @@ -186,16 +189,22 @@ def make_gaussian_input( mult_frags.append(detect_multiplicity(np.array(symbols)[idx])) if use_fragment_guesses: multiplicity = sum(mult_frags) - frag_numb + 1 - charge % 2 - chargekeywords_frag = "%d %d" % (charge, multiplicity) + \ - ''.join([' %d %d' % (charge, mult_frag) - for mult_frag in mult_frags]) + chargekeywords_frag = "%d %d" % (charge, multiplicity) + "".join( + [" %d %d" % (charge, mult_frag) for mult_frag in mult_frags] + ) else: multi_frags = np.array(mult_frags) - multiplicity = 1 + \ - np.count_nonzero(multi_frags == 2) % 2 + \ - np.count_nonzero(multi_frags == 3) * 2 - charge % 2 + multiplicity = ( + 1 + + np.count_nonzero(multi_frags == 2) % 2 + + np.count_nonzero(multi_frags == 3) * 2 + - charge % 2 + ) - if keywords_high_multiplicity is not None and np.count_nonzero(multi_frags == 2) >= 2: + if ( + keywords_high_multiplicity is not None + and np.count_nonzero(multi_frags == 2) >= 2 + ): # at least 2 radicals keywords = keywords_high_multiplicity @@ -207,39 +216,58 @@ def make_gaussian_input( buff = [] # keywords, e.g., force b3lyp/6-31g** if use_fragment_guesses: - keywords[0] = '{} guess=fragment={}'.format( - keywords[0], frag_numb) + keywords[0] = "{} guess=fragment={}".format(keywords[0], frag_numb) chkkeywords = [] - if len(keywords)>1: - chkkeywords.append('%chk={}.chk'.format(str(uuid.uuid1()))) + if len(keywords) > 1: + chkkeywords.append("%chk={}.chk".format(str(uuid.uuid1()))) - nprockeywords = '%nproc={:d}'.format(nproc) + nprockeywords = "%nproc={:d}".format(nproc) # use formula as title - titlekeywords = ''.join(["{}{}".format(symbol,numb) for symbol,numb in - zip(atom_names, atom_numbs)]) - chargekeywords = '{} {}'.format(charge, multiplicity) + titlekeywords = "".join( + ["{}{}".format(symbol, numb) for symbol, numb in zip(atom_names, atom_numbs)] + ) + chargekeywords = "{} {}".format(charge, multiplicity) - buff = [*chkkeywords, nprockeywords, '#{}'.format( - keywords[0]), '', titlekeywords, '', (chargekeywords_frag if use_fragment_guesses else chargekeywords)] + buff = [ + *chkkeywords, + nprockeywords, + "#{}".format(keywords[0]), + "", + titlekeywords, + "", + (chargekeywords_frag if use_fragment_guesses else chargekeywords), + ] for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)): if use_fragment_guesses: - buff.append("%s(Fragment=%d) %f %f %f" % - (symbol, frag_index[ii] + 1, *coordinate)) + buff.append( + "%s(Fragment=%d) %f %f %f" % (symbol, frag_index[ii] + 1, *coordinate) + ) else: buff.append("%s %f %f %f" % (symbol, *coordinate)) - if not sys_data.get('nopbc', False): + if not sys_data.get("nopbc", False): # PBC condition - cell = sys_data['cells'][0] + cell = sys_data["cells"][0] for ii in range(3): # use TV as atomic symbol, see https://gaussian.com/pbc/ - buff.append('TV %f %f %f' % (*cell[ii],)) + buff.append("TV %f %f %f" % (*cell[ii],)) if basis_set is not None: # custom basis set - buff.extend(['', basis_set, '']) + buff.extend(["", basis_set, ""]) for kw in itertools.islice(keywords, 1, None): - buff.extend(['\n--link1--', *chkkeywords, nprockeywords, - '#{}'.format(kw), '', titlekeywords, '', chargekeywords, '']) - buff.append('\n') - return '\n'.join(buff) + buff.extend( + [ + "\n--link1--", + *chkkeywords, + nprockeywords, + "#{}".format(kw), + "", + titlekeywords, + "", + chargekeywords, + "", + ] + ) + buff.append("\n") + return "\n".join(buff) diff --git a/dpdata/gaussian/log.py b/dpdata/gaussian/log.py index 0e0066821..54bc1d518 100644 --- a/dpdata/gaussian/log.py +++ b/dpdata/gaussian/log.py @@ -8,6 +8,7 @@ symbols = ["X"] + ELEMENTS + def to_system_data(file_name, md=False): data = {} # read from log lines @@ -24,10 +25,14 @@ def to_system_data(file_name, md=False): if line.startswith(" SCF Done"): # energies energy = float(line.split()[4]) - elif line.startswith(" Center Atomic Forces (Hartrees/Bohr)"): + elif line.startswith( + " Center Atomic Forces (Hartrees/Bohr)" + ): flag = 1 forces = [] - elif line.startswith(" Input orientation:") or line.startswith(" Z-Matrix orientation:"): + elif line.startswith( + " Input orientation:" + ) or line.startswith(" Z-Matrix orientation:"): flag = 5 coords = [] atom_symbols = [] @@ -45,7 +50,9 @@ def to_system_data(file_name, md=False): nopbc = False cells_t.append(cells) else: - cells_t.append([[100., 0., 0.], [0., 100., 0.], [0., 0., 100.]]) + cells_t.append( + [[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]] + ) flag = 0 else: s = line.split() @@ -53,7 +60,9 @@ def to_system_data(file_name, md=False): # PBC pass else: - forces.append([float(line[23:38]), float(line[38:53]), float(line[53:68])]) + forces.append( + [float(line[23:38]), float(line[38:53]), float(line[53:68])] + ) elif flag == 10: # atom_symbols and coords if line.startswith(" -------"): @@ -67,22 +76,24 @@ def to_system_data(file_name, md=False): coords.append([float(x) for x in s[3:6]]) atom_symbols.append(symbols[int(s[1])]) - assert(coords_t), "cannot find coords" - assert(energy_t), "cannot find energies" - assert(forces_t), "cannot find forces" + assert coords_t, "cannot find coords" + assert energy_t, "cannot find energies" + assert forces_t, "cannot find forces" - atom_names, data['atom_types'], atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True) - data['atom_names'] = list(atom_names) - data['atom_numbs'] = list(atom_numbs) + atom_names, data["atom_types"], atom_numbs = np.unique( + atom_symbols, return_inverse=True, return_counts=True + ) + data["atom_names"] = list(atom_names) + data["atom_numbs"] = list(atom_numbs) if not md: forces_t = forces_t[-1:] energy_t = energy_t[-1:] coords_t = coords_t[-1:] cells_t = cells_t[-1:] - data['forces'] = np.array(forces_t) * force_convert - data['energies'] = np.array(energy_t) * energy_convert - data['coords'] = np.array(coords_t) - data['orig'] = np.array([0, 0, 0]) - data['cells'] = np.array(cells_t) - data['nopbc'] = nopbc + data["forces"] = np.array(forces_t) * force_convert + data["energies"] = np.array(energy_t) * energy_convert + data["coords"] = np.array(coords_t) + data["orig"] = np.array([0, 0, 0]) + data["cells"] = np.array(cells_t) + data["nopbc"] = nopbc return data diff --git a/dpdata/gromacs/gro.py b/dpdata/gromacs/gro.py index 2114810ef..b9930f2bf 100644 --- a/dpdata/gromacs/gro.py +++ b/dpdata/gromacs/gro.py @@ -7,29 +7,32 @@ ang2nm = LengthConversion("angstrom", "nm").value() cell_idx_gmx2dp = [0, 4, 8, 1, 2, 3, 5, 6, 7] + def _format_atom_name(atom_name): patt = re.compile("[a-zA-Z]*") match = re.search(patt, atom_name) fmt_name = match.group().capitalize() return fmt_name + def _get_line(line, fmt_atom_name=True): atom_name = line[10:15].split()[0] if fmt_atom_name: atom_name = _format_atom_name(atom_name) atom_idx = int(line[15:20].split()[0]) - posis = [float(line[ii:ii+8]) for ii in range(20,44,8)] + posis = [float(line[ii : ii + 8]) for ii in range(20, 44, 8)] posis = np.array(posis) * nm2ang return atom_name, atom_idx, posis + def _get_cell(line): - cell = np.zeros([3,3]) + cell = np.zeros([3, 3]) lengths = [float(ii) for ii in line.split()] if len(lengths) >= 3: for dd in range(3): cell[dd][dd] = lengths[dd] else: - raise RuntimeError('wrong box format: ', line) + raise RuntimeError("wrong box format: ", line) if len(lengths) == 9: cell[0][1] = lengths[3] cell[0][2] = lengths[4] @@ -40,8 +43,9 @@ def _get_cell(line): cell = cell * nm2ang return cell + def file_to_system_data(fname, format_atom_name=True, **kwargs): - system = {'coords': [], 'cells': []} + system = {"coords": [], "cells": []} with open(fname) as fp: frame = 0 while True: @@ -62,17 +66,22 @@ def file_to_system_data(fname, format_atom_name=True, **kwargs): cell = _get_cell(fp.readline()) posis = np.array(posis) if frame == 1: - system['orig'] = np.zeros(3) - system['atom_names'] = list(set(names)) - system['atom_numbs'] = [names.count(ii) for ii in system['atom_names']] - system['atom_types'] = [system['atom_names'].index(ii) for ii in names] - system['atom_types'] = np.array(system['atom_types'], dtype = int) - system['coords'].append(posis) - system['cells'].append(cell) - system['coords'] = np.array(system['coords']) - system['cells'] = np.array(system['cells']) + system["orig"] = np.zeros(3) + system["atom_names"] = list(set(names)) + system["atom_numbs"] = [ + names.count(ii) for ii in system["atom_names"] + ] + system["atom_types"] = [ + system["atom_names"].index(ii) for ii in names + ] + system["atom_types"] = np.array(system["atom_types"], dtype=int) + system["coords"].append(posis) + system["cells"].append(cell) + system["coords"] = np.array(system["coords"]) + system["cells"] = np.array(system["cells"]) return system + def from_system_data(system, f_idx=0, **kwargs): resname = kwargs.get("resname", "MOL") shift = kwargs.get("shift", 0) @@ -84,7 +93,9 @@ def from_system_data(system, f_idx=0, **kwargs): atom_type = system["atom_types"][i] atom_name = system["atom_names"][atom_type] coords = system["coords"][f_idx] * ang2nm - ret += "{:>5d}{:<5s}{:>5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format(1, resname, atom_name, i+shift+1, *tuple(coords[i])) + ret += "{:>5d}{:<5s}{:>5s}{:5d}{:8.3f}{:8.3f}{:8.3f}\n".format( + 1, resname, atom_name, i + shift + 1, *tuple(coords[i]) + ) cell = (system["cells"][f_idx].flatten() * ang2nm)[cell_idx_gmx2dp] ret += " " + " ".join([f"{x:.3f}" for x in cell]) diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py index 135fe0514..85b87ba80 100644 --- a/dpdata/lammps/dump.py +++ b/dpdata/lammps/dump.py @@ -2,214 +2,246 @@ import os, sys import numpy as np + lib_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(lib_path) import lmp import warnings + + class UnwrapWarning(UserWarning): pass -warnings.simplefilter('once', UnwrapWarning) -def _get_block (lines, key) : - for idx in range(len(lines)) : - if ('ITEM: ' + key) in lines[idx] : +warnings.simplefilter("once", UnwrapWarning) + + +def _get_block(lines, key): + for idx in range(len(lines)): + if ("ITEM: " + key) in lines[idx]: break idx_s = idx + 1 - for idx in range(idx_s, len(lines)) : - if ('ITEM: ') in lines[idx] : + for idx in range(idx_s, len(lines)): + if ("ITEM: ") in lines[idx]: break idx_e = idx - if idx_e == len(lines)-1 : + if idx_e == len(lines) - 1: idx_e += 1 - return lines[idx_s:idx_e], lines[idx_s-1] + return lines[idx_s:idx_e], lines[idx_s - 1] -def get_atype(lines, type_idx_zero = False) : - blk, head = _get_block(lines, 'ATOMS') + +def get_atype(lines, type_idx_zero=False): + blk, head = _get_block(lines, "ATOMS") keys = head.split() - id_idx = keys.index('id') - 2 - tidx = keys.index('type') - 2 + id_idx = keys.index("id") - 2 + tidx = keys.index("type") - 2 atype = [] - for ii in blk : + for ii in blk: atype.append([int(ii.split()[id_idx]), int(ii.split()[tidx])]) atype.sort() - atype = np.array(atype, dtype = int) - if type_idx_zero : - return atype[:,1] - 1 - else : - return atype[:,1] - -def get_natoms(lines) : - blk, head = _get_block(lines, 'NUMBER OF ATOMS') + atype = np.array(atype, dtype=int) + if type_idx_zero: + return atype[:, 1] - 1 + else: + return atype[:, 1] + + +def get_natoms(lines): + blk, head = _get_block(lines, "NUMBER OF ATOMS") return int(blk[0]) -def get_natomtypes(lines) : + +def get_natomtypes(lines): atype = get_atype(lines) return max(atype) -def get_natoms_vec(lines) : + +def get_natoms_vec(lines): atype = get_atype(lines) natoms_vec = [] natomtypes = get_natomtypes(lines) - for ii in range(natomtypes) : - natoms_vec.append(sum(atype == ii+1)) - assert (sum(natoms_vec) == get_natoms(lines)) + for ii in range(natomtypes): + natoms_vec.append(sum(atype == ii + 1)) + assert sum(natoms_vec) == get_natoms(lines) return natoms_vec + def get_coordtype_and_scalefactor(keys): # 4 types in total,with different scaling factor - key_pc=['x','y','z'] # plain cartesian, sf = 1 - key_uc=['xu','yu','zu'] # unwraped cartesian, sf = 1 - key_s=['xs','ys','zs'] # scaled by lattice parameter, sf = lattice parameter - key_su = ['xsu','ysu','zsu'] #scaled and unfolded,sf = lattice parameter - lmp_coor_type = [key_pc,key_uc,key_s,key_su] - sf = [0,0,1,1] - uw = [0,1,0,1] # unwraped or not + key_pc = ["x", "y", "z"] # plain cartesian, sf = 1 + key_uc = ["xu", "yu", "zu"] # unwraped cartesian, sf = 1 + key_s = ["xs", "ys", "zs"] # scaled by lattice parameter, sf = lattice parameter + key_su = ["xsu", "ysu", "zsu"] # scaled and unfolded,sf = lattice parameter + lmp_coor_type = [key_pc, key_uc, key_s, key_su] + sf = [0, 0, 1, 1] + uw = [0, 1, 0, 1] # unwraped or not for k in range(4): if all(i in keys for i in lmp_coor_type[k]): return lmp_coor_type[k], sf[k], uw[k] -def safe_get_posi(lines,cell,orig=np.zeros(3), unwrap=False) : - blk, head = _get_block(lines, 'ATOMS') + +def safe_get_posi(lines, cell, orig=np.zeros(3), unwrap=False): + blk, head = _get_block(lines, "ATOMS") keys = head.split() coord_tp_and_sf = get_coordtype_and_scalefactor(keys) - assert coord_tp_and_sf is not None, 'Dump file does not contain atomic coordinates!' + assert coord_tp_and_sf is not None, "Dump file does not contain atomic coordinates!" coordtype, sf, uw = coord_tp_and_sf - id_idx = keys.index('id') - 2 - xidx = keys.index(coordtype[0])-2 - yidx = keys.index(coordtype[1])-2 - zidx = keys.index(coordtype[2])-2 + id_idx = keys.index("id") - 2 + xidx = keys.index(coordtype[0]) - 2 + yidx = keys.index(coordtype[1]) - 2 + zidx = keys.index(coordtype[2]) - 2 posis = [] - for ii in blk : + for ii in blk: words = ii.split() - posis.append([float(words[id_idx]), float(words[xidx]), float(words[yidx]), float(words[zidx])]) + posis.append( + [ + float(words[id_idx]), + float(words[xidx]), + float(words[yidx]), + float(words[zidx]), + ] + ) posis.sort() - posis = np.array(posis)[:,1:4] + posis = np.array(posis)[:, 1:4] if not sf: - posis = (posis - orig) @ np.linalg.inv(cell) # Convert to scaled coordinates for unscaled coordinates + posis = (posis - orig) @ np.linalg.inv( + cell + ) # Convert to scaled coordinates for unscaled coordinates if uw and unwrap: - return posis @ cell # convert scaled coordinates back to Cartesien coordinates unwrap at the periodic boundaries + return ( + posis @ cell + ) # convert scaled coordinates back to Cartesien coordinates unwrap at the periodic boundaries else: if uw and not unwrap: - warnings.warn(message='Your dump file contains unwrapped coordinates, but you did not specify unwrapping (unwrap = True). The default is wrapping at periodic boundaries (unwrap = False).\n',category=UnwrapWarning) - return (posis % 1) @ cell # Convert scaled coordinates back to Cartesien coordinates with wraping at periodic boundary conditions + warnings.warn( + message="Your dump file contains unwrapped coordinates, but you did not specify unwrapping (unwrap = True). The default is wrapping at periodic boundaries (unwrap = False).\n", + category=UnwrapWarning, + ) + return ( + posis % 1 + ) @ cell # Convert scaled coordinates back to Cartesien coordinates with wraping at periodic boundary conditions + -def get_dumpbox(lines) : - blk, h = _get_block(lines, 'BOX BOUNDS') - bounds = np.zeros([3,2]) +def get_dumpbox(lines): + blk, h = _get_block(lines, "BOX BOUNDS") + bounds = np.zeros([3, 2]) tilt = np.zeros([3]) - load_tilt = 'xy xz yz' in h - for dd in range(3) : + load_tilt = "xy xz yz" in h + for dd in range(3): info = [float(jj) for jj in blk[dd].split()] bounds[dd][0] = info[0] bounds[dd][1] = info[1] - if load_tilt : + if load_tilt: tilt[dd] = info[2] return bounds, tilt -def dumpbox2box(bounds, tilt) : + +def dumpbox2box(bounds, tilt): xy = tilt[0] xz = tilt[1] yz = tilt[2] - xlo = bounds[0][0] - min(0.0,xy,xz,xy+xz) - xhi = bounds[0][1] - max(0.0,xy,xz,xy+xz) - ylo = bounds[1][0] - min(0.0,yz) - yhi = bounds[1][1] - max(0.0,yz) + xlo = bounds[0][0] - min(0.0, xy, xz, xy + xz) + xhi = bounds[0][1] - max(0.0, xy, xz, xy + xz) + ylo = bounds[1][0] - min(0.0, yz) + yhi = bounds[1][1] - max(0.0, yz) zlo = bounds[2][0] zhi = bounds[2][1] info = [[xlo, xhi], [ylo, yhi], [zlo, zhi]] return lmp.lmpbox2box(info, tilt) -def box2dumpbox(orig, box) : + +def box2dumpbox(orig, box): lohi, tilt = lmp.box2lmpbox(orig, box) xy = tilt[0] xz = tilt[1] yz = tilt[2] - bounds = np.zeros([3,2]) - bounds[0][0] = lohi[0][0] + min(0.0,xy,xz,xy+xz) - bounds[0][1] = lohi[0][1] + max(0.0,xy,xz,xy+xz) - bounds[1][0] = lohi[1][0] + min(0.0,yz) - bounds[1][1] = lohi[1][1] + max(0.0,yz) + bounds = np.zeros([3, 2]) + bounds[0][0] = lohi[0][0] + min(0.0, xy, xz, xy + xz) + bounds[0][1] = lohi[0][1] + max(0.0, xy, xz, xy + xz) + bounds[1][0] = lohi[1][0] + min(0.0, yz) + bounds[1][1] = lohi[1][1] + max(0.0, yz) bounds[2][0] = lohi[2][0] bounds[2][1] = lohi[2][1] return bounds, tilt -def load_file(fname, begin = 0, step = 1) : +def load_file(fname, begin=0, step=1): lines = [] buff = [] cc = -1 with open(fname) as fp: while True: - line = fp.readline().rstrip('\n') - if not line : - if cc >= begin and (cc - begin) % step == 0 : + line = fp.readline().rstrip("\n") + if not line: + if cc >= begin and (cc - begin) % step == 0: lines += buff buff = [] cc += 1 return lines - if 'ITEM: TIMESTEP' in line : - if cc >= begin and (cc - begin) % step == 0 : + if "ITEM: TIMESTEP" in line: + if cc >= begin and (cc - begin) % step == 0: lines += buff buff = [] cc += 1 - if cc >= begin and (cc - begin) % step == 0 : + if cc >= begin and (cc - begin) % step == 0: buff.append(line) -def system_data(lines, type_map = None, type_idx_zero = True, unwrap=False) : +def system_data(lines, type_map=None, type_idx_zero=True, unwrap=False): array_lines = split_traj(lines) lines = array_lines[0] system = {} - system['atom_numbs'] = get_natoms_vec(lines) - system['atom_names'] = [] - if type_map == None : - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append('TYPE_%d' % ii) - else : - assert(len(type_map) >= len(system['atom_numbs'])) - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append(type_map[ii]) + system["atom_numbs"] = get_natoms_vec(lines) + system["atom_names"] = [] + if type_map == None: + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append("TYPE_%d" % ii) + else: + assert len(type_map) >= len(system["atom_numbs"]) + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append(type_map[ii]) bounds, tilt = get_dumpbox(lines) orig, cell = dumpbox2box(bounds, tilt) - system['orig'] = np.array(orig) - np.array(orig) - system['cells'] = [np.array(cell)] - system['atom_types'] = get_atype(lines, type_idx_zero = type_idx_zero) - system['coords'] = [safe_get_posi(lines, cell, np.array(orig), unwrap)] - for ii in range(1, len(array_lines)) : + system["orig"] = np.array(orig) - np.array(orig) + system["cells"] = [np.array(cell)] + system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero) + system["coords"] = [safe_get_posi(lines, cell, np.array(orig), unwrap)] + for ii in range(1, len(array_lines)): bounds, tilt = get_dumpbox(array_lines[ii]) orig, cell = dumpbox2box(bounds, tilt) - system['cells'].append(cell) - atype = get_atype(array_lines[ii], type_idx_zero = type_idx_zero) + system["cells"].append(cell) + atype = get_atype(array_lines[ii], type_idx_zero=type_idx_zero) # map atom type; a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id] - idx = np.argsort(atype)[np.argsort(np.argsort(system['atom_types']))] - system['coords'].append(safe_get_posi(array_lines[ii], cell, np.array(orig), unwrap)[idx]) - system['cells'] = np.array(system['cells']) - system['coords'] = np.array(system['coords']) + idx = np.argsort(atype)[np.argsort(np.argsort(system["atom_types"]))] + system["coords"].append( + safe_get_posi(array_lines[ii], cell, np.array(orig), unwrap)[idx] + ) + system["cells"] = np.array(system["cells"]) + system["coords"] = np.array(system["coords"]) return system -def split_traj(dump_lines) : +def split_traj(dump_lines): marks = [] - for idx,ii in enumerate(dump_lines) : - if 'ITEM: TIMESTEP' in ii : + for idx, ii in enumerate(dump_lines): + if "ITEM: TIMESTEP" in ii: marks.append(idx) - if len(marks) == 0 : + if len(marks) == 0: return None - elif len(marks) == 1 : + elif len(marks) == 1: return [dump_lines] - else : + else: block_size = marks[1] - marks[0] ret = [] - for ii in marks : - ret.append(dump_lines[ii:ii+block_size]) + for ii in marks: + ret.append(dump_lines[ii : ii + block_size]) # for ii in range(len(marks)-1): # assert(marks[ii+1] - marks[ii] == block_size) return ret return None -if __name__ == '__main__' : +if __name__ == "__main__": # fname = 'dump.hti' # lines = open(fname).read().split('\n') # # print(get_natoms(lines)) @@ -223,9 +255,9 @@ def split_traj(dump_lines) : # print(box) # np.savetxt('tmp.out', posi - orig, fmt='%.6f') # print(system_data(lines)) - lines = load_file('conf_unfold.dump', begin = 0, step = 1) + lines = load_file("conf_unfold.dump", begin=0, step=1) al = split_traj(lines) - s = system_data(lines,['O','H']) - #l = np.linalg.norm(s['cells'][1],axis=1) - #p = s['coords'][0] + l - #np.savetxt('p',p,fmt='%1.10f') + s = system_data(lines, ["O", "H"]) + # l = np.linalg.norm(s['cells'][1],axis=1) + # p = s['coords'][0] + l + # np.savetxt('p',p,fmt='%1.10f') diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index c5a82b633..7f80fcc0b 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -2,186 +2,233 @@ import numpy as np -ptr_float_fmt = '%15.10f' -ptr_int_fmt = '%6d' -ptr_key_fmt = '%15s' +ptr_float_fmt = "%15.10f" +ptr_int_fmt = "%6d" +ptr_key_fmt = "%15s" -def _get_block (lines, keys) : - for idx in range(len(lines)) : - if keys in lines[idx] : + +def _get_block(lines, keys): + for idx in range(len(lines)): + if keys in lines[idx]: break if idx == len(lines) - 1: return None - idx_s = idx+2 + idx_s = idx + 2 idx = idx_s ret = [] - while True : - if idx == len(lines) or len(lines[idx].split()) == 0 : + while True: + if idx == len(lines) or len(lines[idx].split()) == 0: break - else : + else: ret.append(lines[idx]) idx += 1 return ret -def lmpbox2box(lohi, tilt) : + +def lmpbox2box(lohi, tilt): xy = tilt[0] xz = tilt[1] yz = tilt[2] orig = np.array([lohi[0][0], lohi[1][0], lohi[2][0]]) lens = [] - for dd in range(3) : + for dd in range(3): lens.append(lohi[dd][1] - lohi[dd][0]) xx = [lens[0], 0, 0] yy = [xy, lens[1], 0] - zz= [xz, yz, lens[2]] + zz = [xz, yz, lens[2]] return orig, np.array([xx, yy, zz]) -def box2lmpbox(orig, box) : - lohi = np.zeros([3,2]) - for dd in range(3) : + +def box2lmpbox(orig, box): + lohi = np.zeros([3, 2]) + for dd in range(3): lohi[dd][0] = orig[dd] tilt = np.zeros(3) tilt[0] = box[1][0] tilt[1] = box[2][0] tilt[2] = box[2][1] - lens = np.zeros(3) + lens = np.zeros(3) lens[0] = box[0][0] lens[1] = box[1][1] lens[2] = box[2][2] - for dd in range(3) : + for dd in range(3): lohi[dd][1] = lohi[dd][0] + lens[dd] return lohi, tilt -def get_atoms(lines) : - return _get_block(lines, 'Atoms') -def get_natoms(lines) : - for ii in lines : - if 'atoms' in ii : +def get_atoms(lines): + return _get_block(lines, "Atoms") + + +def get_natoms(lines): + for ii in lines: + if "atoms" in ii: return int(ii.split()[0]) return None -def get_natomtypes(lines) : - for ii in lines : - if 'atom types' in ii : + +def get_natomtypes(lines): + for ii in lines: + if "atom types" in ii: return int(ii.split()[0]) return None -def _atom_info_mol(line) : + +def _atom_info_mol(line): vec = line.split() # idx, mole_type, atom_type, charge, x, y, z - return int(vec[0]), int(vec[1]), int(vec[2]), float(vec[3]), float(vec[4]), float(vec[5]), float(vec[6]) - -def _atom_info_atom(line) : + return ( + int(vec[0]), + int(vec[1]), + int(vec[2]), + float(vec[3]), + float(vec[4]), + float(vec[5]), + float(vec[6]), + ) + + +def _atom_info_atom(line): vec = line.split() # idx, atom_type, x, y, z return int(vec[0]), int(vec[1]), float(vec[2]), float(vec[3]), float(vec[4]) -def get_natoms_vec(lines) : + +def get_natoms_vec(lines): atype = get_atype(lines) natoms_vec = [] natomtypes = get_natomtypes(lines) - for ii in range(natomtypes) : - natoms_vec.append(sum(atype == ii+1)) - assert (sum(natoms_vec) == get_natoms(lines)) + for ii in range(natomtypes): + natoms_vec.append(sum(atype == ii + 1)) + assert sum(natoms_vec) == get_natoms(lines) return natoms_vec -def get_atype(lines, type_idx_zero = False) : - alines = get_atoms(lines) + +def get_atype(lines, type_idx_zero=False): + alines = get_atoms(lines) atype = [] - for ii in alines : + for ii in alines: # idx, mt, at, q, x, y, z = _atom_info_mol(ii) idx, at, x, y, z = _atom_info_atom(ii) - if type_idx_zero : - atype.append(at-1) + if type_idx_zero: + atype.append(at - 1) else: atype.append(at) - return np.array(atype, dtype = int) + return np.array(atype, dtype=int) -def get_posi(lines) : + +def get_posi(lines): atom_lines = get_atoms(lines) posis = [] - for ii in atom_lines : + for ii in atom_lines: # posis.append([float(jj) for jj in ii.split()[4:7]]) posis.append([float(jj) for jj in ii.split()[2:5]]) return np.array(posis) -def get_lmpbox(lines) : + +def get_lmpbox(lines): box_info = [] tilt = np.zeros(3) - for ii in lines : - if 'xlo' in ii and 'xhi' in ii : + for ii in lines: + if "xlo" in ii and "xhi" in ii: box_info.append([float(ii.split()[0]), float(ii.split()[1])]) break - for ii in lines : - if 'ylo' in ii and 'yhi' in ii : + for ii in lines: + if "ylo" in ii and "yhi" in ii: box_info.append([float(ii.split()[0]), float(ii.split()[1])]) break - for ii in lines : - if 'zlo' in ii and 'zhi' in ii : + for ii in lines: + if "zlo" in ii and "zhi" in ii: box_info.append([float(ii.split()[0]), float(ii.split()[1])]) break - for ii in lines : - if 'xy' in ii and 'xz' in ii and 'yz' in ii : + for ii in lines: + if "xy" in ii and "xz" in ii and "yz" in ii: tilt = np.array([float(jj) for jj in ii.split()[0:3]]) return box_info, tilt -def system_data(lines, type_map = None, type_idx_zero = True) : +def system_data(lines, type_map=None, type_idx_zero=True): system = {} - system['atom_numbs'] = get_natoms_vec(lines) - system['atom_names'] = [] - if type_map == None : - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append('Type_%d' % ii) - else : - assert(len(type_map) >= len(system['atom_numbs'])) - for ii in range(len(system['atom_numbs'])) : - system['atom_names'].append(type_map[ii]) + system["atom_numbs"] = get_natoms_vec(lines) + system["atom_names"] = [] + if type_map == None: + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append("Type_%d" % ii) + else: + assert len(type_map) >= len(system["atom_numbs"]) + for ii in range(len(system["atom_numbs"])): + system["atom_names"].append(type_map[ii]) lohi, tilt = get_lmpbox(lines) orig, cell = lmpbox2box(lohi, tilt) - system['orig'] = np.array(orig) - system['cells'] = [np.array(cell)] - natoms = sum(system['atom_numbs']) - system['atom_types'] = get_atype(lines, type_idx_zero = type_idx_zero) - system['coords'] = [get_posi(lines)] - system['cells'] = np.array(system['cells']) - system['coords'] = np.array(system['coords']) + system["orig"] = np.array(orig) + system["cells"] = [np.array(cell)] + natoms = sum(system["atom_numbs"]) + system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero) + system["coords"] = [get_posi(lines)] + system["cells"] = np.array(system["cells"]) + system["coords"] = np.array(system["coords"]) return system -def to_system_data(lines, type_map = None, type_idx_zero = True) : - return system_data(lines, type_map = type_map, type_idx_zero = type_idx_zero) - -def from_system_data(system, f_idx = 0) : - ret = '' - ret += '\n' - natoms = sum(system['atom_numbs']) - ntypes = len(system['atom_numbs']) - ret += '%d atoms\n' % natoms - ret += '%d atom types\n' % ntypes - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' xlo xhi\n') % (0, system['cells'][f_idx][0][0]) - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' ylo yhi\n') % (0, system['cells'][f_idx][1][1]) - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' zlo zhi\n') % (0, system['cells'][f_idx][2][2]) - ret += (ptr_float_fmt + ' ' + ptr_float_fmt + ' ' + ptr_float_fmt + ' xy xz yz\n') % \ - (system['cells'][f_idx][1][0], system['cells'][f_idx][2][0], system['cells'][f_idx][2][1]) - ret += '\n' - ret += 'Atoms # atomic\n' - ret += '\n' - coord_fmt = ptr_int_fmt + ' ' + ptr_int_fmt + ' ' + ptr_float_fmt + ' ' + ptr_float_fmt + ' ' + ptr_float_fmt + '\n' - for ii in range(natoms) : - ret += coord_fmt % \ - (ii+1, - system['atom_types'][ii] + 1, - system['coords'][f_idx][ii][0] - system['orig'][0], - system['coords'][f_idx][ii][1] - system['orig'][1], - system['coords'][f_idx][ii][2] - system['orig'][2] + +def to_system_data(lines, type_map=None, type_idx_zero=True): + return system_data(lines, type_map=type_map, type_idx_zero=type_idx_zero) + + +def from_system_data(system, f_idx=0): + ret = "" + ret += "\n" + natoms = sum(system["atom_numbs"]) + ntypes = len(system["atom_numbs"]) + ret += "%d atoms\n" % natoms + ret += "%d atom types\n" % ntypes + ret += (ptr_float_fmt + " " + ptr_float_fmt + " xlo xhi\n") % ( + 0, + system["cells"][f_idx][0][0], + ) + ret += (ptr_float_fmt + " " + ptr_float_fmt + " ylo yhi\n") % ( + 0, + system["cells"][f_idx][1][1], + ) + ret += (ptr_float_fmt + " " + ptr_float_fmt + " zlo zhi\n") % ( + 0, + system["cells"][f_idx][2][2], + ) + ret += ( + ptr_float_fmt + " " + ptr_float_fmt + " " + ptr_float_fmt + " xy xz yz\n" + ) % ( + system["cells"][f_idx][1][0], + system["cells"][f_idx][2][0], + system["cells"][f_idx][2][1], + ) + ret += "\n" + ret += "Atoms # atomic\n" + ret += "\n" + coord_fmt = ( + ptr_int_fmt + + " " + + ptr_int_fmt + + " " + + ptr_float_fmt + + " " + + ptr_float_fmt + + " " + + ptr_float_fmt + + "\n" + ) + for ii in range(natoms): + ret += coord_fmt % ( + ii + 1, + system["atom_types"][ii] + 1, + system["coords"][f_idx][ii][0] - system["orig"][0], + system["coords"][f_idx][ii][1] - system["orig"][1], + system["coords"][f_idx][ii][2] - system["orig"][2], ) return ret -if __name__ == '__main__' : - fname = 'water-SPCE.data' - lines = open(fname).read().split('\n') +if __name__ == "__main__": + fname = "water-SPCE.data" + lines = open(fname).read().split("\n") bonds, tilt = get_lmpbox(lines) # print(bonds, tilt) orig, box = lmpbox2box(bonds, tilt) diff --git a/dpdata/md/msd.py b/dpdata/md/msd.py index 0286b8abd..eebc72963 100644 --- a/dpdata/md/msd.py +++ b/dpdata/md/msd.py @@ -1,53 +1,55 @@ import numpy as np from .pbc import system_pbc_shift + def _msd(coords, cells, pbc_shift, begin): nframes = cells.shape[0] natoms = coords.shape[1] ff = begin prev_coord = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) - msds = [0.] - for ff in range(begin+1,nframes) : + msds = [0.0] + for ff in range(begin + 1, nframes): curr_coord = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) diff_coord = curr_coord - prev_coord msds.append(np.sum(diff_coord * diff_coord) / natoms) return np.array(msds) + def _msd_win(coords, cells, pbc_shift, begin, window): nframes = cells.shape[0] natoms = coords.shape[1] ncoords = np.zeros(coords.shape) msd = np.zeros([window]) - for ff in range(nframes) : - ncoords[ff] = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) + for ff in range(nframes): + ncoords[ff] = coords[ff] + np.matmul(pbc_shift[ff], cells[ff]) cc = 0 - for ii in range(begin,nframes-window+1) : + for ii in range(begin, nframes - window + 1): start = np.tile(ncoords[ii], (window, 1, 1)) - diff_coord = ncoords[ii:ii+window] - start + diff_coord = ncoords[ii : ii + window] - start diff_coord = np.reshape(diff_coord, [-1, natoms * 3]) - msd += np.sum(diff_coord * diff_coord, axis = 1) / natoms + msd += np.sum(diff_coord * diff_coord, axis=1) / natoms cc += 1 return np.array(msd) / cc -def msd(system, sel = None, begin = 0, window = 0) : - natoms = system.get_natoms() - if sel is None : + +def msd(system, sel=None, begin=0, window=0): + natoms = system.get_natoms() + if sel is None: sel_idx = np.arange(natoms) - else : + else: sel_idx = [] - for ii in range(natoms) : - if sel[ii] : + for ii in range(natoms): + if sel[ii]: sel_idx.append(ii) - sel_idx = np.array(sel_idx, dtype = int) + sel_idx = np.array(sel_idx, dtype=int) nsel = sel_idx.size nframes = system.get_nframes() pbc_shift = system_pbc_shift(system) - coords = system['coords'] - cells = system['cells'] - pbc_shift = pbc_shift[:,sel_idx,:] - coords = coords[:,sel_idx,:] - if window <= 0 : + coords = system["coords"] + cells = system["cells"] + pbc_shift = pbc_shift[:, sel_idx, :] + coords = coords[:, sel_idx, :] + if window <= 0: return _msd(coords, cells, pbc_shift, begin) - else : + else: return _msd_win(coords, cells, pbc_shift, begin, window) - diff --git a/dpdata/md/pbc.py b/dpdata/md/pbc.py index b3318aaf6..4eee7c654 100644 --- a/dpdata/md/pbc.py +++ b/dpdata/md/pbc.py @@ -1,66 +1,63 @@ import numpy as np -def posi_diff(box, r0, r1) : +def posi_diff(box, r0, r1): rbox = np.linalg.inv(box) rbox = rbox.T - p0 = (np.dot(rbox, r0)) - p1 = (np.dot(rbox, r1)) + p0 = np.dot(rbox, r0) + p1 = np.dot(rbox, r1) dp = p0 - p1 shift = np.zeros(3) - for dd in range(3) : - if dp[dd] >= 0.5 : + for dd in range(3): + if dp[dd] >= 0.5: dp[dd] -= 1 - elif dp[dd] < -0.5 : + elif dp[dd] < -0.5: dp[dd] += 1 - dr = np.dot(box.T, dp) + dr = np.dot(box.T, dp) return dr -def posi_shift(box, r0, r1) : +def posi_shift(box, r0, r1): rbox = np.linalg.inv(box) rbox = rbox.T - p0 = (np.dot(rbox, r0)) - p1 = (np.dot(rbox, r1)) + p0 = np.dot(rbox, r0) + p1 = np.dot(rbox, r1) dp = p0 - p1 shift = np.zeros(3) - for dd in range(3) : - if dp[dd] >= 0.5 : + for dd in range(3): + if dp[dd] >= 0.5: shift[dd] -= 1 - elif dp[dd] < -0.5 : + elif dp[dd] < -0.5: shift[dd] += 1 return shift -def dir_coord(coord, box) : +def dir_coord(coord, box): rbox = np.linalg.inv(box) return np.matmul(coord, rbox) -def system_pbc_shift(system) : +def system_pbc_shift(system): f_idx = 0 - prev_ncoord = dir_coord(system['coords'][f_idx], - system['cells' ][f_idx]) - shifts = np.zeros([system.get_nframes(), system.get_natoms(), 3], dtype = int) - curr_shift = np.zeros([system.get_natoms(), 3], dtype = int) + prev_ncoord = dir_coord(system["coords"][f_idx], system["cells"][f_idx]) + shifts = np.zeros([system.get_nframes(), system.get_natoms(), 3], dtype=int) + curr_shift = np.zeros([system.get_natoms(), 3], dtype=int) half = 0.5 * np.ones([system.get_natoms(), 3]) - for ii in range(system.get_nframes()) : - curr_ncoord = dir_coord(system['coords'][ii], - system['cells' ][ii]) + for ii in range(system.get_nframes()): + curr_ncoord = dir_coord(system["coords"][ii], system["cells"][ii]) diff_ncoord = curr_ncoord - prev_ncoord - curr_shift -= (diff_ncoord > half) - curr_shift += (diff_ncoord <-half) + curr_shift -= diff_ncoord > half + curr_shift += diff_ncoord < -half shifts[ii] = np.copy(curr_shift) prev_ncoord = curr_ncoord - return np.array(shifts, dtype = int) + return np.array(shifts, dtype=int) -def apply_pbc(system_coords, system_cells) : +def apply_pbc(system_coords, system_cells): coords = [] nframes = system_cells.shape[0] - for ff in range(nframes) : - ncoord = dir_coord(system_coords[ff], - system_cells[ff]) + for ff in range(nframes): + ncoord = dir_coord(system_coords[ff], system_cells[ff]) ncoord = ncoord % 1 coords.append(np.matmul(ncoord, system_cells[ff])) return np.array(coords) diff --git a/dpdata/md/rdf.py b/dpdata/md/rdf.py index e1fd50a23..220bdcb03 100644 --- a/dpdata/md/rdf.py +++ b/dpdata/md/rdf.py @@ -1,9 +1,7 @@ import numpy as np -def rdf(sys, - sel_type = [None, None], - max_r = 5, - nbins = 100) : + +def rdf(sys, sel_type=[None, None], max_r=5, nbins=100): """ compute the rdf of a system @@ -12,9 +10,9 @@ def rdf(sys, sys : System or LabeledSystem The dpdata system sel_type: list - List of size 2. The first element specifies the type of the first atom, - while the second element specifies the type of the second atom. - Both elements can be ints or list of ints. + List of size 2. The first element specifies the type of the first atom, + while the second element specifies the type of the second atom. + Both elements can be ints or list of ints. If the element is None, all types are specified. Examples are sel_type = [0, 0], sel_type = [0, [0, 1]] or sel_type = [0, None] max_r: float @@ -31,37 +29,35 @@ def rdf(sys, coord: np.array The coordination number up to r """ - return compute_rdf(sys['cells'], sys['coords'], sys['atom_types'], - sel_type = sel_type, - max_r = max_r, - nbins = nbins) + return compute_rdf( + sys["cells"], + sys["coords"], + sys["atom_types"], + sel_type=sel_type, + max_r=max_r, + nbins=nbins, + ) + -def compute_rdf(box, - posis, - atype, - sel_type = [None, None], - max_r = 5, - nbins = 100) : +def compute_rdf(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100): nframes = box.shape[0] xx = None all_rdf = [] all_cod = [] for ii in range(nframes): - xx, rdf, cod = _compute_rdf_1frame(box[ii], posis[ii], atype, sel_type, max_r, nbins) + xx, rdf, cod = _compute_rdf_1frame( + box[ii], posis[ii], atype, sel_type, max_r, nbins + ) all_rdf.append(rdf) all_cod.append(cod) all_rdf = np.array(all_rdf).reshape([nframes, -1]) all_cod = np.array(all_cod).reshape([nframes, -1]) - all_rdf = np.average(all_rdf, axis = 0) - all_cod = np.average(all_cod, axis = 0) + all_rdf = np.average(all_rdf, axis=0) + all_cod = np.average(all_cod, axis=0) return xx, all_rdf, all_cod -def _compute_rdf_1frame(box, - posis, - atype, - sel_type = [None, None], - max_r = 5, - nbins = 100) : + +def _compute_rdf_1frame(box, posis, atype, sel_type=[None, None], max_r=5, nbins=100): all_types = list(set(list(np.sort(atype)))) if sel_type[0] is None: sel_type[0] = all_types @@ -73,14 +69,20 @@ def _compute_rdf_1frame(box, sel_type[1] = [sel_type[1]] natoms = len(posis) from ase import Atoms - import ase.neighborlist - atoms = Atoms(positions=posis, cell=box, pbc=[1,1,1]) - nlist = ase.neighborlist.NeighborList(max_r, self_interaction=False, bothways=True, primitive=ase.neighborlist.NewPrimitiveNeighborList) + import ase.neighborlist + + atoms = Atoms(positions=posis, cell=box, pbc=[1, 1, 1]) + nlist = ase.neighborlist.NeighborList( + max_r, + self_interaction=False, + bothways=True, + primitive=ase.neighborlist.NewPrimitiveNeighborList, + ) nlist.update(atoms) stat = np.zeros(nbins) stat_acc = np.zeros(nbins) hh = max_r / float(nbins) - for ii in range(natoms) : + for ii in range(natoms): # atom "0" if atype[ii] in sel_type[0]: indices, offsets = nlist.get_neighbors(ii) @@ -89,7 +91,7 @@ def _compute_rdf_1frame(box, if atype[jj] in sel_type[1]: posi_jj = atoms.positions[jj] + np.dot(os, atoms.get_cell()) diff = posi_jj - atoms.positions[ii] - dr = np.linalg.norm(diff) + dr = np.linalg.norm(diff) # if (np.linalg.norm(diff- diff_1)) > 1e-12 : # raise RuntimeError si = int(dr / hh) @@ -106,19 +108,21 @@ def _compute_rdf_1frame(box, rho1 = c1 / np.linalg.det(box) # compute coordination number for ii in range(1, nbins): - stat_acc[ii] = stat_acc[ii-1] + stat[ii-1] + stat_acc[ii] = stat_acc[ii - 1] + stat[ii - 1] stat_acc = stat_acc / c0 # compute rdf for ii in range(nbins): - vol = 4./3. * np.pi * ( ((ii+1)*hh) ** 3 - ((ii)*hh) ** 3 ) + vol = 4.0 / 3.0 * np.pi * (((ii + 1) * hh) ** 3 - ((ii) * hh) ** 3) rho = stat[ii] / vol stat[ii] = rho / rho1 / c0 - xx = np.arange(0, max_r-1e-12, hh) + xx = np.arange(0, max_r - 1e-12, hh) return xx, stat, stat_acc -if __name__ == '__main__': + +if __name__ == "__main__": import dpdata - sys = dpdata.System('out.lmp') - xx, stat = rdf(sys, sel_type = [[0], None], max_r = 8, nbins = 100) + + sys = dpdata.System("out.lmp") + xx, stat = rdf(sys, sel_type=[[0], None], max_r=8, nbins=100) res = np.concatenate([xx, stat]).reshape([2, -1]) - np.savetxt('rdf.out', res.T) + np.savetxt("rdf.out", res.T) diff --git a/dpdata/md/water.py b/dpdata/md/water.py index 75ad1ad2d..b9ab833b6 100644 --- a/dpdata/md/water.py +++ b/dpdata/md/water.py @@ -2,114 +2,107 @@ from .pbc import posi_diff from .pbc import posi_shift -def compute_bonds (box, - posis, - atype, - oh_sel = [0,1], - max_roh = 1.3, - uniq_hbond = True): - try : + +def compute_bonds(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): + try: import ase - import ase.neighborlist + import ase.neighborlist + # nlist implemented by ase - return compute_bonds_ase(box, posis, atype, oh_sel, max_roh, uniq_hbond) + return compute_bonds_ase(box, posis, atype, oh_sel, max_roh, uniq_hbond) except ImportError: # nlist naivly implemented , scales as O(N^2) return compute_bonds_naive(box, posis, atype, oh_sel, max_roh, uniq_hbond) -def compute_bonds_ase(box, - posis, - atype, - oh_sel = [0,1], - max_roh = 1.3, - uniq_hbond = True): +def compute_bonds_ase(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): natoms = len(posis) from ase import Atoms - import ase.neighborlist - atoms = Atoms(positions=posis, cell=box, pbc=[1,1,1]) - nlist = ase.neighborlist.NeighborList(max_roh, self_interaction=False, bothways=True, primitive=ase.neighborlist.NewPrimitiveNeighborList) + import ase.neighborlist + + atoms = Atoms(positions=posis, cell=box, pbc=[1, 1, 1]) + nlist = ase.neighborlist.NeighborList( + max_roh, + self_interaction=False, + bothways=True, + primitive=ase.neighborlist.NewPrimitiveNeighborList, + ) nlist.update(atoms) bonds = [] o_type = oh_sel[0] h_type = oh_sel[1] - for ii in range(natoms) : + for ii in range(natoms): bonds.append([]) - for ii in range(natoms) : - if atype[ii] == o_type : + for ii in range(natoms): + if atype[ii] == o_type: nn, ss = nlist.get_neighbors(ii) for jj in nn: - if atype[jj] == h_type : + if atype[jj] == h_type: dr = posi_diff(box, posis[ii], posis[jj]) - if np.linalg.norm(dr) < max_roh : + if np.linalg.norm(dr) < max_roh: bonds[ii].append(jj) bonds[jj].append(ii) - if uniq_hbond : - for jj in range(natoms) : - if atype[jj] == h_type : - if len(bonds[jj]) > 1 : + if uniq_hbond: + for jj in range(natoms): + if atype[jj] == h_type: + if len(bonds[jj]) > 1: orig_bonds = bonds[jj] min_bd = 1e10 min_idx = -1 - for ii in bonds[jj] : + for ii in bonds[jj]: dr = posi_diff(box, posis[ii], posis[jj]) drr = np.linalg.norm(dr) # print(ii,jj, posis[ii], posis[jj], drr) - if drr < min_bd : + if drr < min_bd: min_idx = ii min_bd = drr bonds[jj] = [min_idx] orig_bonds.remove(min_idx) # print(min_idx, orig_bonds, bonds[jj]) - for ii in orig_bonds : + for ii in orig_bonds: bonds[ii].remove(jj) return bonds - -def compute_bonds_naive(box, - posis, - atype, - oh_sel = [0,1], - max_roh = 1.3, - uniq_hbond = True): + +def compute_bonds_naive(box, posis, atype, oh_sel=[0, 1], max_roh=1.3, uniq_hbond=True): natoms = len(posis) bonds = [] o_type = oh_sel[0] h_type = oh_sel[1] - for ii in range(natoms) : + for ii in range(natoms): bonds.append([]) - for ii in range(natoms) : - if atype[ii] == o_type : - for jj in range(natoms) : - if atype[jj] == h_type : + for ii in range(natoms): + if atype[ii] == o_type: + for jj in range(natoms): + if atype[jj] == h_type: dr = posi_diff(box, posis[ii], posis[jj]) - if np.linalg.norm(dr) < max_roh : + if np.linalg.norm(dr) < max_roh: bonds[ii].append(jj) bonds[jj].append(ii) - if uniq_hbond : - for jj in range(natoms) : - if atype[jj] == h_type : - if len(bonds[jj]) > 1 : + if uniq_hbond: + for jj in range(natoms): + if atype[jj] == h_type: + if len(bonds[jj]) > 1: orig_bonds = bonds[jj] min_bd = 1e10 min_idx = -1 - for ii in bonds[jj] : + for ii in bonds[jj]: dr = posi_diff(box, posis[ii], posis[jj]) drr = np.linalg.norm(dr) # print(ii,jj, posis[ii], posis[jj], drr) - if drr < min_bd : + if drr < min_bd: min_idx = ii min_bd = drr bonds[jj] = [min_idx] orig_bonds.remove(min_idx) # print(min_idx, orig_bonds, bonds[jj]) - for ii in orig_bonds : + for ii in orig_bonds: bonds[ii].remove(jj) return bonds -# def ions_count (atype, -# bonds, +# def ions_count (atype, +# bonds, # oh_sel = [0, 1]) : # no = 0 # noh = 0 @@ -146,10 +139,8 @@ def compute_bonds_naive(box, # raise RuntimeError("unknow case: numb of O bonded to H > 1") # return no, noh, noh2, noh3, nh -def find_ions (atype, - bonds, - oh_sel = [0, 1], - ret_h2o = True) : + +def find_ions(atype, bonds, oh_sel=[0, 1], ret_h2o=True): no = [] noh = [] noh2 = [] @@ -158,37 +149,34 @@ def find_ions (atype, natoms = len(atype) o_type = oh_sel[0] h_type = oh_sel[1] - for ii in range(natoms) : - if atype[ii] == o_type : - if len(bonds[ii] ) == 0 : + for ii in range(natoms): + if atype[ii] == o_type: + if len(bonds[ii]) == 0: no.append(ii) - elif len(bonds[ii] ) == 1 : + elif len(bonds[ii]) == 1: noh.append(ii) - elif len(bonds[ii] ) == 2 : - if ret_h2o : + elif len(bonds[ii]) == 2: + if ret_h2o: noh2.append(ii) - elif len(bonds[ii] ) == 3 : + elif len(bonds[ii]) == 3: noh3.append(ii) - else : + else: raise RuntimeError("unknow case: numb of H bonded to O > 3") - for ii in range(natoms) : - if atype[ii] == h_type : - if len(bonds[ii] ) == 0 : + for ii in range(natoms): + if atype[ii] == h_type: + if len(bonds[ii]) == 0: nh.append(ii) - elif len(bonds[ii] ) == 1 : + elif len(bonds[ii]) == 1: pass - else : + else: raise RuntimeError("unknow case: numb of O bonded to H > 1") return no, noh, noh2, noh3, nh - -def pbc_coords(cells, - coords, - atom_types, - oh_sel = [0, 1], - max_roh = 1.3): - bonds = compute_bonds(cells, coords, atom_types, oh_sel = oh_sel, max_roh = max_roh, uniq_hbond = True) +def pbc_coords(cells, coords, atom_types, oh_sel=[0, 1], max_roh=1.3): + bonds = compute_bonds( + cells, coords, atom_types, oh_sel=oh_sel, max_roh=max_roh, uniq_hbond=True + ) new_coords = np.copy(coords) natoms = len(atom_types) @@ -196,10 +184,9 @@ def pbc_coords(cells, h_type = oh_sel[1] for ii in range(natoms): if atom_types[ii] == o_type: - assert(len(bonds[ii]) == 2), 'O has more than 2 bonded Hs, stop' + assert len(bonds[ii]) == 2, "O has more than 2 bonded Hs, stop" for jj in bonds[ii]: - assert(atom_types[jj] == h_type), 'The atom bonded to O is not H, stop' + assert atom_types[jj] == h_type, "The atom bonded to O is not H, stop" shift = posi_shift(cells, coords[jj], coords[ii]) new_coords[jj] = coords[jj] + np.matmul(shift, cells) return new_coords - diff --git a/dpdata/periodic_table.json b/dpdata/periodic_table.json index 69c55325d..7a055ad6f 100644 --- a/dpdata/periodic_table.json +++ b/dpdata/periodic_table.json @@ -823,4 +823,4 @@ "radius": null, "calculated_radius": null } -} \ No newline at end of file +} diff --git a/dpdata/periodic_table.py b/dpdata/periodic_table.py index df8a50388..dc64d40ad 100644 --- a/dpdata/periodic_table.py +++ b/dpdata/periodic_table.py @@ -1,38 +1,138 @@ from pathlib import Path -from monty.serialization import loadfn,dumpfn +from monty.serialization import loadfn, dumpfn -fpdt=str(Path(__file__).absolute().parent / "periodic_table.json") -_pdt=loadfn(fpdt) -ELEMENTS=['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', \ - 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag',\ - 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb',\ - 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', \ - 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr'] +fpdt = str(Path(__file__).absolute().parent / "periodic_table.json") +_pdt = loadfn(fpdt) +ELEMENTS = [ + "H", + "He", + "Li", + "Be", + "B", + "C", + "N", + "O", + "F", + "Ne", + "Na", + "Mg", + "Al", + "Si", + "P", + "S", + "Cl", + "Ar", + "K", + "Ca", + "Sc", + "Ti", + "V", + "Cr", + "Mn", + "Fe", + "Co", + "Ni", + "Cu", + "Zn", + "Ga", + "Ge", + "As", + "Se", + "Br", + "Kr", + "Rb", + "Sr", + "Y", + "Zr", + "Nb", + "Mo", + "Tc", + "Ru", + "Rh", + "Pd", + "Ag", + "Cd", + "In", + "Sn", + "Sb", + "Te", + "I", + "Xe", + "Cs", + "Ba", + "La", + "Ce", + "Pr", + "Nd", + "Pm", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Ho", + "Er", + "Tm", + "Yb", + "Lu", + "Hf", + "Ta", + "W", + "Re", + "Os", + "Ir", + "Pt", + "Au", + "Hg", + "Tl", + "Pb", + "Bi", + "Po", + "At", + "Rn", + "Fr", + "Ra", + "Ac", + "Th", + "Pa", + "U", + "Np", + "Pu", + "Am", + "Cm", + "Bk", + "Cf", + "Es", + "Fm", + "Md", + "No", + "Lr", +] -class Element: +class Element: def __init__(self, symbol: str): assert symbol in ELEMENTS self.symbol = "%s" % symbol d = _pdt[symbol] - self._Z = d['atomic_no'] - self._name = d['name'] - self._X = d['X'] - self._mass = d['atomic_mass'] - self._r = d['radius'] + self._Z = d["atomic_no"] + self._name = d["name"] + self._X = d["X"] + self._mass = d["atomic_mass"] + self._r = d["radius"] self._cr = d["calculated_radius"] def __str__(self): return self.symbol def __repr__(self): - return "Element : %s"%self.symbol - + return "Element : %s" % self.symbol + @classmethod - def from_Z(cls,Z): - assert(Z>0) - assert(Z 0 + assert Z < len(ELEMENTS) + return cls(ELEMENTS[Z - 1]) @property def Z(self): diff --git a/dpdata/plugin.py b/dpdata/plugin.py index d000f558b..4f163ced0 100644 --- a/dpdata/plugin.py +++ b/dpdata/plugin.py @@ -12,22 +12,25 @@ def xxx(): pass >>> print(Plugin.plugins['xx']) """ + def __init__(self): self.plugins = {} def register(self, key): """Register a plugin. - + Parameter --------- key: str Key of the plugin. """ + def decorator(object): self.plugins[key] = object return object + return decorator - + def get_plugin(self, key): return self.plugins[key] diff --git a/dpdata/plugins/3dmol.py b/dpdata/plugins/3dmol.py index c30893295..fa9f02aa2 100644 --- a/dpdata/plugins/3dmol.py +++ b/dpdata/plugins/3dmol.py @@ -11,12 +11,15 @@ class Py3DMolFormat(Format): To use this format, py3Dmol should be installed in advance. """ - def to_system(self, - data: dict, - f_idx: int = 0, - size: Tuple[int] = (300,300), - style: dict = {"stick":{}, "sphere":{"radius":0.4}}, - **kwargs): + + def to_system( + self, + data: dict, + f_idx: int = 0, + size: Tuple[int] = (300, 300), + style: dict = {"stick": {}, "sphere": {"radius": 0.4}}, + **kwargs + ): """Show 3D structure of a frame in jupyter. Parameters @@ -35,10 +38,11 @@ def to_system(self, >>> system.to_3dmol() """ import py3Dmol - types = np.array(data['atom_names'])[data['atom_types']] - xyz = coord_to_xyz(data['coords'][f_idx], types) + + types = np.array(data["atom_names"])[data["atom_types"]] + xyz = coord_to_xyz(data["coords"][f_idx], types) viewer = py3Dmol.view(width=size[0], height=size[1]) - viewer.addModel(xyz, 'xyz') + viewer.addModel(xyz, "xyz") viewer.setStyle(style.copy()) viewer.zoomTo() return viewer diff --git a/dpdata/plugins/__init__.py b/dpdata/plugins/__init__.py index ca097fd75..66364aa25 100644 --- a/dpdata/plugins/__init__.py +++ b/dpdata/plugins/__init__.py @@ -1,8 +1,9 @@ import importlib from pathlib import Path + try: from importlib import metadata -except ImportError: # for Python<3.8 +except ImportError: # for Python<3.8 import importlib_metadata as metadata PACKAGE_BASE = "dpdata.plugins" @@ -15,8 +16,8 @@ # https://setuptools.readthedocs.io/en/latest/userguide/entry_point.html try: - eps = metadata.entry_points(group='dpdata.plugins') + eps = metadata.entry_points(group="dpdata.plugins") except TypeError: - eps = metadata.entry_points().get('dpdata.plugins', []) + eps = metadata.entry_points().get("dpdata.plugins", []) for ep in eps: plugin = ep.load() diff --git a/dpdata/plugins/abacus.py b/dpdata/plugins/abacus.py index c219053b7..a9c82b059 100644 --- a/dpdata/plugins/abacus.py +++ b/dpdata/plugins/abacus.py @@ -3,12 +3,13 @@ import dpdata.abacus.relax from dpdata.format import Format + @Format.register("abacus/stru") @Format.register("stru") class AbacusSTRUFormat(Format): def from_system(self, file_name, **kwargs): return dpdata.abacus.scf.get_frame_from_stru(file_name) - + def to_system(self, data, file_name, frame_idx=0, **kwargs): """ Dump the system into ABACUS STRU format file. @@ -28,35 +29,45 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): numerical_descriptor: str, optional numerical descriptor file """ - - pp_file = kwargs.get('pp_file') - numerical_orbital = kwargs.get('numerical_orbital') - mass = kwargs.get('mass') - numerical_descriptor = kwargs.get('numerical_descriptor') - stru_string = dpdata.abacus.scf.make_unlabeled_stru(data=data, frame_idx=frame_idx, pp_file=pp_file, numerical_orbital=numerical_orbital, numerical_descriptor=numerical_descriptor, mass=mass) + + pp_file = kwargs.get("pp_file") + numerical_orbital = kwargs.get("numerical_orbital") + mass = kwargs.get("mass") + numerical_descriptor = kwargs.get("numerical_descriptor") + stru_string = dpdata.abacus.scf.make_unlabeled_stru( + data=data, + frame_idx=frame_idx, + pp_file=pp_file, + numerical_orbital=numerical_orbital, + numerical_descriptor=numerical_descriptor, + mass=mass, + ) with open(file_name, "w") as fp: fp.write(stru_string) + @Format.register("abacus/scf") @Format.register("abacus/pw/scf") @Format.register("abacus/lcao/scf") class AbacusSCFFormat(Format): - #@Format.post("rot_lower_triangular") + # @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): return dpdata.abacus.scf.get_frame(file_name) + @Format.register("abacus/md") @Format.register("abacus/pw/md") @Format.register("abacus/lcao/md") class AbacusMDFormat(Format): - #@Format.post("rot_lower_triangular") + # @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): return dpdata.abacus.md.get_frame(file_name) + @Format.register("abacus/relax") @Format.register("abacus/pw/relax") @Format.register("abacus/lcao/relax") class AbacusRelaxFormat(Format): - #@Format.post("rot_lower_triangular") + # @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): - return dpdata.abacus.relax.get_frame(file_name) + return dpdata.abacus.relax.get_frame(file_name) diff --git a/dpdata/plugins/amber.py b/dpdata/plugins/amber.py index 4fe41c1e4..cf2df3ca6 100644 --- a/dpdata/plugins/amber.py +++ b/dpdata/plugins/amber.py @@ -10,15 +10,37 @@ @Format.register("amber/md") class AmberMDFormat(Format): - def from_system(self, file_name=None, parm7_file=None, nc_file=None, use_element_symbols=None, **kwargs): + def from_system( + self, + file_name=None, + parm7_file=None, + nc_file=None, + use_element_symbols=None, + **kwargs, + ): # assume the prefix is the same if the spefic name is not given if parm7_file is None: parm7_file = file_name + ".parm7" if nc_file is None: nc_file = file_name + ".nc" - return dpdata.amber.md.read_amber_traj(parm7_file=parm7_file, nc_file=nc_file, use_element_symbols=use_element_symbols, labeled=False) - - def from_labeled_system(self, file_name=None, parm7_file=None, nc_file=None, mdfrc_file=None, mden_file=None, mdout_file=None, use_element_symbols=None, **kwargs): + return dpdata.amber.md.read_amber_traj( + parm7_file=parm7_file, + nc_file=nc_file, + use_element_symbols=use_element_symbols, + labeled=False, + ) + + def from_labeled_system( + self, + file_name=None, + parm7_file=None, + nc_file=None, + mdfrc_file=None, + mden_file=None, + mdout_file=None, + use_element_symbols=None, + **kwargs, + ): # assume the prefix is the same if the spefic name is not given if parm7_file is None: parm7_file = file_name + ".parm7" @@ -30,25 +52,28 @@ def from_labeled_system(self, file_name=None, parm7_file=None, nc_file=None, mdf mden_file = file_name + ".mden" if mdout_file is None: mdout_file = file_name + ".mdout" - return dpdata.amber.md.read_amber_traj(parm7_file, nc_file, mdfrc_file, mden_file, mdout_file, use_element_symbols) + return dpdata.amber.md.read_amber_traj( + parm7_file, nc_file, mdfrc_file, mden_file, mdout_file, use_element_symbols + ) @Format.register("sqm/out") class SQMOutFormat(Format): def from_system(self, fname, **kwargs): - ''' + """ Read from ambertools sqm.out - ''' + """ return dpdata.amber.sqm.parse_sqm_out(fname) - + def from_labeled_system(self, fname, **kwargs): - ''' + """ Read from ambertools sqm.out - ''' + """ data = dpdata.amber.sqm.parse_sqm_out(fname) assert "forces" in list(data.keys()), f"No forces in {fname}" return data + @Format.register("sqm/in") class SQMINFormat(Format): def to_system(self, data, fname=None, frame_idx=0, **kwargs): @@ -85,14 +110,14 @@ def to_system(self, data, fname=None, frame_idx=0, **kwargs): @Driver.register("sqm") class SQMDriver(Driver): """AMBER sqm program driver. - + Parameters ---------- sqm_exec : str, default=sqm path to sqm program **kwargs : dict other arguments to make input files. See :class:`SQMINFormat` - + Examples -------- Use DFTB3 method to calculate potential energy: @@ -101,7 +126,8 @@ class SQMDriver(Driver): >>> labeled_system['energies'][0] -15.41111246 """ - def __init__(self, sqm_exec: str="sqm", **kwargs: dict) -> None: + + def __init__(self, sqm_exec: str = "sqm", **kwargs: dict) -> None: self.sqm_exec = sqm_exec self.kwargs = kwargs @@ -114,12 +140,14 @@ def label(self, data: dict) -> dict: out_fn = os.path.join(d, "%d.out" % ii) ss.to("sqm/in", inp_fn, **self.kwargs) try: - sp.check_output([*self.sqm_exec.split(), "-O", "-i", inp_fn, "-o", out_fn]) + sp.check_output( + [*self.sqm_exec.split(), "-O", "-i", inp_fn, "-o", out_fn] + ) except sp.CalledProcessError as e: with open(out_fn) as f: raise RuntimeError( "Run sqm failed! Output:\n" + f.read() - ) from e + ) from e labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="sqm/out")) return labeled_system.data @@ -127,12 +155,13 @@ def label(self, data: dict) -> dict: @Minimizer.register("sqm") class SQMMinimizer(Minimizer): """SQM minimizer. - + Parameters ---------- maxcyc : int, default=1000 maximun cycle to minimize """ + def __init__(self, maxcyc=1000, *args, **kwargs) -> None: assert maxcyc > 0, "maxcyc should be more than 0 to minimize" self.driver = SQMDriver(maxcyc=maxcyc, **kwargs) diff --git a/dpdata/plugins/ase.py b/dpdata/plugins/ase.py index fa2093e40..b6e0fcb70 100644 --- a/dpdata/plugins/ase.py +++ b/dpdata/plugins/ase.py @@ -3,9 +3,11 @@ from dpdata.format import Format import numpy as np import dpdata + try: import ase.io from ase.calculators.calculator import PropertyNotImplementedError + if TYPE_CHECKING: from ase.optimize.optimize import Optimizer except ImportError: @@ -39,17 +41,19 @@ def from_system(self, atoms: "ase.Atoms", **kwargs) -> dict: symbols = atoms.get_chemical_symbols() atom_names = list(set(symbols)) atom_numbs = [symbols.count(symbol) for symbol in atom_names] - atom_types = np.array([atom_names.index(symbol) for symbol in symbols]).astype(int) + atom_types = np.array([atom_names.index(symbol) for symbol in symbols]).astype( + int + ) cells = atoms.cell[:] coords = atoms.get_positions() info_dict = { - 'atom_names': atom_names, - 'atom_numbs': atom_numbs, - 'atom_types': atom_types, - 'cells': np.array([cells]).astype('float32'), - 'coords': np.array([coords]).astype('float32'), - 'orig': np.zeros(3), - 'nopbc': not np.any(atoms.get_pbc()), + "atom_names": atom_names, + "atom_numbs": atom_numbs, + "atom_types": atom_types, + "cells": np.array([cells]).astype("float32"), + "coords": np.array([coords]).astype("float32"), + "orig": np.zeros(3), + "nopbc": not np.any(atoms.get_pbc()), } return info_dict @@ -66,7 +70,7 @@ def from_labeled_system(self, atoms: "ase.Atoms", **kwargs) -> dict: ------- dict data dict - + Raises ------ RuntimeError @@ -80,20 +84,28 @@ def from_labeled_system(self, atoms: "ase.Atoms", **kwargs) -> dict: energies = atoms.get_potential_energy() forces = atoms.get_forces() info_dict = { - ** info_dict, - 'energies': np.array([energies]).astype('float32'), - 'forces': np.array([forces]).astype('float32'), + **info_dict, + "energies": np.array([energies]).astype("float32"), + "forces": np.array([forces]).astype("float32"), } try: stress = atoms.get_stress(False) except PropertyNotImplementedError: pass else: - virials = np.array([-atoms.get_volume() * stress]).astype('float32') - info_dict['virials'] = virials + virials = np.array([-atoms.get_volume() * stress]).astype("float32") + info_dict["virials"] = virials return info_dict - def from_multi_systems(self, file_name: str, begin: Optional[int] = None, end: Optional[int] = None, step: Optional[int] = None, ase_fmt: Optional[str] = None, **kwargs) -> "ase.Atoms": + def from_multi_systems( + self, + file_name: str, + begin: Optional[int] = None, + end: Optional[int] = None, + step: Optional[int] = None, + ase_fmt: Optional[str] = None, + **kwargs + ) -> "ase.Atoms": """Convert a ASE supported file to ASE Atoms. It will finally be converted to MultiSystems. @@ -121,48 +133,49 @@ def from_multi_systems(self, file_name: str, begin: Optional[int] = None, end: O yield atoms def to_system(self, data, **kwargs): - ''' + """ convert System to ASE Atom obj - ''' + """ from ase import Atoms structures = [] - species = [data['atom_names'][tt] for tt in data['atom_types']] + species = [data["atom_names"][tt] for tt in data["atom_types"]] - for ii in range(data['coords'].shape[0]): + for ii in range(data["coords"].shape[0]): structure = Atoms( - symbols=species, positions=data['coords'][ii], pbc=not data.get('nopbc', False), cell=data['cells'][ii]) + symbols=species, + positions=data["coords"][ii], + pbc=not data.get("nopbc", False), + cell=data["cells"][ii], + ) structures.append(structure) return structures def to_labeled_system(self, data, *args, **kwargs): - '''Convert System to ASE Atoms object.''' + """Convert System to ASE Atoms object.""" from ase import Atoms from ase.calculators.singlepoint import SinglePointCalculator structures = [] - species = [data['atom_names'][tt] for tt in data['atom_types']] + species = [data["atom_names"][tt] for tt in data["atom_types"]] - for ii in range(data['coords'].shape[0]): + for ii in range(data["coords"].shape[0]): structure = Atoms( symbols=species, - positions=data['coords'][ii], - pbc=not data.get('nopbc', False), - cell=data['cells'][ii] + positions=data["coords"][ii], + pbc=not data.get("nopbc", False), + cell=data["cells"][ii], ) - results = { - 'energy': data["energies"][ii], - 'forces': data["forces"][ii] - } + results = {"energy": data["energies"][ii], "forces": data["forces"][ii]} if "virials" in data: # convert to GPa as this is ase convention # v_pref = 1 * 1e4 / 1.602176621e6 vol = structure.get_volume() # results['stress'] = data["virials"][ii] / (v_pref * vol) - results['stress'] = -data["virials"][ii] / vol + results["stress"] = -data["virials"][ii] / vol structure.calc = SinglePointCalculator(structure, **results) structures.append(structure) @@ -173,7 +186,7 @@ def to_labeled_system(self, data, *args, **kwargs): @Driver.register("ase") class ASEDriver(Driver): """ASE Driver. - + Parameters ---------- calculator : ase.calculators.calculator.Calculato @@ -186,12 +199,12 @@ def __init__(self, calculator: "ase.calculators.calculator.Calculator") -> None: def label(self, data: dict) -> dict: """Label a system data. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -204,7 +217,9 @@ def label(self, data: dict) -> dict: labeled_system = dpdata.LabeledSystem() for atoms in structures: atoms.calc = self.calculator - ls = dpdata.LabeledSystem(atoms, fmt="ase/structure", type_map=data['atom_names']) + ls = dpdata.LabeledSystem( + atoms, fmt="ase/structure", type_map=data["atom_names"] + ) labeled_system.append(ls) return labeled_system.data @@ -226,15 +241,19 @@ class ASEMinimizer(Minimizer): optimizer_kwargs : dict, optional other parameters for optimizer """ - def __init__(self, - driver: Driver, - optimizer: Optional[Type["Optimizer"]] = None, - fmax: float = 5e-3, - max_steps: Optional[int] = None, - optimizer_kwargs: dict = {}) -> None: + + def __init__( + self, + driver: Driver, + optimizer: Optional[Type["Optimizer"]] = None, + fmax: float = 5e-3, + max_steps: Optional[int] = None, + optimizer_kwargs: dict = {}, + ) -> None: self.calculator = driver.ase_calculator if optimizer is None: from ase.optimize import LBFGS + self.optimizer = LBFGS else: self.optimizer = optimizer @@ -252,7 +271,7 @@ def minimize(self, data: dict) -> dict: ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -266,6 +285,8 @@ def minimize(self, data: dict) -> dict: atoms.calc = self.calculator dyn = self.optimizer(atoms, **self.optimizer_kwargs) dyn.run(fmax=self.fmax, steps=self.max_steps) - ls = dpdata.LabeledSystem(atoms, fmt="ase/structure", type_map=data['atom_names']) + ls = dpdata.LabeledSystem( + atoms, fmt="ase/structure", type_map=data["atom_names"] + ) labeled_system.append(ls) return labeled_system.data diff --git a/dpdata/plugins/cp2k.py b/dpdata/plugins/cp2k.py index 8787c7f09..143c1821b 100644 --- a/dpdata/plugins/cp2k.py +++ b/dpdata/plugins/cp2k.py @@ -16,15 +16,16 @@ def from_labeled_system(self, file_name, restart=False, **kwargs): class CP2KOutputFormat(Format): def from_labeled_system(self, file_name, restart=False, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial \ - = dpdata.cp2k.output.get_frames(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.cp2k.output.get_frames(file_name) if tmp_virial is not None: - data['virials'] = tmp_virial + data["virials"] = tmp_virial return data diff --git a/dpdata/plugins/deepmd.py b/dpdata/plugins/deepmd.py index c3b7f4ca5..2e8850091 100644 --- a/dpdata/plugins/deepmd.py +++ b/dpdata/plugins/deepmd.py @@ -14,15 +14,18 @@ @Format.register("deepmd/raw") class DeePMDRawFormat(Format): def from_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.raw.to_system_data(file_name, type_map=type_map, labels=False) + return dpdata.deepmd.raw.to_system_data( + file_name, type_map=type_map, labels=False + ) def to_system(self, data, file_name, **kwargs): - """Dump the system in deepmd raw format to directory `file_name` - """ + """Dump the system in deepmd raw format to directory `file_name`""" dpdata.deepmd.raw.dump(file_name, data) def from_labeled_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.raw.to_system_data(file_name, type_map=type_map, labels=True) + return dpdata.deepmd.raw.to_system_data( + file_name, type_map=type_map, labels=True + ) MultiMode = Format.MultiModes.Directory @@ -31,7 +34,9 @@ def from_labeled_system(self, file_name, type_map=None, **kwargs): @Format.register("deepmd/comp") class DeePMDCompFormat(Format): def from_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.comp.to_system_data(file_name, type_map=type_map, labels=False) + return dpdata.deepmd.comp.to_system_data( + file_name, type_map=type_map, labels=False + ) def to_system(self, data, file_name, set_size=5000, prec=np.float64, **kwargs): """ @@ -53,29 +58,37 @@ def to_system(self, data, file_name, set_size=5000, prec=np.float64, **kwargs): prec: {numpy.float32, numpy.float64} The floating point precision of the compressed data """ - dpdata.deepmd.comp.dump( - file_name, data, set_size=set_size, comp_prec=prec) + dpdata.deepmd.comp.dump(file_name, data, set_size=set_size, comp_prec=prec) def from_labeled_system(self, file_name, type_map=None, **kwargs): - return dpdata.deepmd.comp.to_system_data(file_name, type_map=type_map, labels=True) + return dpdata.deepmd.comp.to_system_data( + file_name, type_map=type_map, labels=True + ) MultiMode = Format.MultiModes.Directory + @Format.register("deepmd/hdf5") class DeePMDHDF5Format(Format): """HDF5 format for DeePMD-kit. - + Examples -------- Dump a MultiSystems to a HDF5 file: >>> import dpdata >>> dpdata.MultiSystems().from_deepmd_npy("data").to_deepmd_hdf5("data.hdf5") """ - def _from_system(self, file_name: Union[str, h5py.Group, h5py.File], type_map: List[str], labels: bool): + + def _from_system( + self, + file_name: Union[str, h5py.Group, h5py.File], + type_map: List[str], + labels: bool, + ): """Convert HDF5 file to System or LabeledSystem data. - + This method is used to switch from labeled or non-labeled options. - + Parameters ---------- file_name : str or h5py.Group or h5py.File @@ -97,19 +110,25 @@ def _from_system(self, file_name: Union[str, h5py.Group, h5py.File], type_map: L file_name is not str or h5py.Group or h5py.File """ if isinstance(file_name, (h5py.Group, h5py.File)): - return dpdata.deepmd.hdf5.to_system_data(file_name, "", type_map=type_map, labels=labels) + return dpdata.deepmd.hdf5.to_system_data( + file_name, "", type_map=type_map, labels=labels + ) elif isinstance(file_name, str): s = file_name.split("#") name = s[1] if len(s) > 1 else "" - with h5py.File(s[0], 'r') as f: - return dpdata.deepmd.hdf5.to_system_data(f, name, type_map=type_map, labels=labels) + with h5py.File(s[0], "r") as f: + return dpdata.deepmd.hdf5.to_system_data( + f, name, type_map=type_map, labels=labels + ) else: raise TypeError("Unsupported file_name") - def from_system(self, - file_name: Union[str, h5py.Group, h5py.File], - type_map: Optional[List[str]] = None, - **kwargs) -> dict: + def from_system( + self, + file_name: Union[str, h5py.Group, h5py.File], + type_map: Optional[List[str]] = None, + **kwargs + ) -> dict: """Convert HDF5 file to System data. Parameters @@ -132,10 +151,12 @@ def from_system(self, """ return self._from_system(file_name, type_map=type_map, labels=False) - def from_labeled_system(self, - file_name: Union[str, h5py.Group, h5py.File], - type_map: Optional[List[str]] = None, - **kwargs) -> dict: + def from_labeled_system( + self, + file_name: Union[str, h5py.Group, h5py.File], + type_map: Optional[List[str]] = None, + **kwargs + ) -> dict: """Convert HDF5 file to LabeledSystem data. Parameters @@ -158,14 +179,16 @@ def from_labeled_system(self, """ return self._from_system(file_name, type_map=type_map, labels=True) - def to_system(self, - data : dict, - file_name: Union[str, h5py.Group, h5py.File], - set_size : int = 5000, - comp_prec : np.dtype = np.float64, - **kwargs): + def to_system( + self, + data: dict, + file_name: Union[str, h5py.Group, h5py.File], + set_size: int = 5000, + comp_prec: np.dtype = np.float64, + **kwargs + ): """Convert System data to HDF5 file. - + Parameters ---------- data : dict @@ -179,21 +202,23 @@ def to_system(self, data precision """ if isinstance(file_name, (h5py.Group, h5py.File)): - dpdata.deepmd.hdf5.dump(file_name, "", data, set_size = set_size, comp_prec = comp_prec) + dpdata.deepmd.hdf5.dump( + file_name, "", data, set_size=set_size, comp_prec=comp_prec + ) elif isinstance(file_name, str): s = file_name.split("#") name = s[1] if len(s) > 1 else "" - with h5py.File(s[0], 'w') as f: - dpdata.deepmd.hdf5.dump(f, name, data, set_size = set_size, comp_prec = comp_prec) + with h5py.File(s[0], "w") as f: + dpdata.deepmd.hdf5.dump( + f, name, data, set_size=set_size, comp_prec=comp_prec + ) else: raise TypeError("Unsupported file_name") - def from_multi_systems(self, - directory: str, - **kwargs) -> h5py.Group: + def from_multi_systems(self, directory: str, **kwargs) -> h5py.Group: """Generate HDF5 groups from a HDF5 file, which will be passed to `from_system`. - + Parameters ---------- directory : str @@ -204,29 +229,28 @@ def from_multi_systems(self, h5py.Group a HDF5 group in the HDF5 file """ - with h5py.File(directory, 'r') as f: + with h5py.File(directory, "r") as f: for ff in f.keys(): yield f[ff] - def to_multi_systems(self, - formulas: List[str], - directory: str, - **kwargs) -> h5py.Group: + def to_multi_systems( + self, formulas: List[str], directory: str, **kwargs + ) -> h5py.Group: """Generate HDF5 groups, which will be passed to `to_system`. - + Parameters ---------- formulas : list[str] formulas of MultiSystems directory : str HDF5 file name - + Yields ------ h5py.Group a HDF5 group with the name of formula """ - with h5py.File(directory, 'w') as f: + with h5py.File(directory, "w") as f: for ff in formulas: yield f.create_group(ff) @@ -236,16 +260,17 @@ def to_multi_systems(self, @Driver.register("deepmd-kit") class DPDriver(Driver): """DeePMD-kit driver. - + Parameters ---------- dp : deepmd.DeepPot or str The deepmd-kit potential class or the filename of the model. - + Examples -------- >>> DPDriver("frozen_model.pb") """ + def __init__(self, dp: str) -> None: try: # DP 1.x @@ -257,16 +282,18 @@ def __init__(self, dp: str) -> None: self.dp = DeepPot(dp) else: self.dp = dp - self.enable_auto_batch_size = 'auto_batch_size' in DeepPot.__init__.__code__.co_varnames + self.enable_auto_batch_size = ( + "auto_batch_size" in DeepPot.__init__.__code__.co_varnames + ) def label(self, data: dict) -> dict: """Label a system data by deepmd-kit. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -274,36 +301,38 @@ def label(self, data: dict) -> dict: """ type_map = self.dp.get_type_map() - ori_sys = dpdata.System.from_dict({'data': data}) + ori_sys = dpdata.System.from_dict({"data": data}) ori_sys.sort_atom_names(type_map=type_map) - atype = ori_sys['atom_types'] + atype = ori_sys["atom_types"] if not self.enable_auto_batch_size: labeled_sys = dpdata.LabeledSystem() for ss in ori_sys: - coord = ss['coords'].reshape((1, ss.get_natoms()*3)) + coord = ss["coords"].reshape((1, ss.get_natoms() * 3)) if not ss.nopbc: - cell = ss['cells'].reshape((1, 9)) + cell = ss["cells"].reshape((1, 9)) else: cell = None e, f, v = self.dp.eval(coord, cell, atype) data = ss.data - data['energies'] = e.reshape((1,)) - data['forces'] = f.reshape((1, ss.get_natoms(), 3)) - data['virials'] = v.reshape((1, 3, 3)) - this_sys = dpdata.LabeledSystem.from_dict({'data': data}) + data["energies"] = e.reshape((1,)) + data["forces"] = f.reshape((1, ss.get_natoms(), 3)) + data["virials"] = v.reshape((1, 3, 3)) + this_sys = dpdata.LabeledSystem.from_dict({"data": data}) labeled_sys.append(this_sys) data = labeled_sys.data else: # since v2.0.2, auto batch size is supported - coord = ori_sys.data['coords'].reshape((ori_sys.get_nframes(), ori_sys.get_natoms()*3)) + coord = ori_sys.data["coords"].reshape( + (ori_sys.get_nframes(), ori_sys.get_natoms() * 3) + ) if not ori_sys.nopbc: - cell = ori_sys.data['cells'].reshape((ori_sys.get_nframes(), 9)) + cell = ori_sys.data["cells"].reshape((ori_sys.get_nframes(), 9)) else: cell = None e, f, v = self.dp.eval(coord, cell, atype) data = ori_sys.data.copy() - data['energies'] = e.reshape((ori_sys.get_nframes(),)) - data['forces'] = f.reshape((ori_sys.get_nframes(), ori_sys.get_natoms(), 3)) - data['virials'] = v.reshape((ori_sys.get_nframes(), 3, 3)) + data["energies"] = e.reshape((ori_sys.get_nframes(),)) + data["forces"] = f.reshape((ori_sys.get_nframes(), ori_sys.get_natoms(), 3)) + data["virials"] = v.reshape((ori_sys.get_nframes(), 3, 3)) return data diff --git a/dpdata/plugins/fhi_aims.py b/dpdata/plugins/fhi_aims.py index b1805c4ef..45b181fc0 100644 --- a/dpdata/plugins/fhi_aims.py +++ b/dpdata/plugins/fhi_aims.py @@ -1,37 +1,49 @@ import dpdata.fhi_aims.output from dpdata.format import Format + @Format.register("fhi_aims/md") @Format.register("fhi_aims/output") class FhiMDFormat(Format): - def from_labeled_system(self, file_name, md=True, begin = 0, step = 1, convergence_check=True, **kwargs): + def from_labeled_system( + self, file_name, md=True, begin=0, step=1, convergence_check=True, **kwargs + ): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial, \ - = dpdata.fhi_aims.output.get_frames(file_name, md = md, begin = begin, step = step, convergence_check=convergence_check) - if tmp_virial is not None : - data['virials'] = tmp_virial + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.fhi_aims.output.get_frames( + file_name, + md=md, + begin=begin, + step=step, + convergence_check=convergence_check, + ) + if tmp_virial is not None: + data["virials"] = tmp_virial return data + @Format.register("fhi_aims/scf") class FhiSCFFormat(Format): def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial, \ - = dpdata.fhi_aims.output.get_frames(file_name, md = False, begin = 0, step = 1) - if tmp_virial is not None : - data['virials'] = tmp_virial + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.fhi_aims.output.get_frames(file_name, md=False, begin=0, step=1) + if tmp_virial is not None: + data["virials"] = tmp_virial return data diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py index f49dbf054..b2579a265 100644 --- a/dpdata/plugins/gaussian.py +++ b/dpdata/plugins/gaussian.py @@ -14,11 +14,7 @@ def from_labeled_system(self, file_name, md=False, **kwargs): try: return dpdata.gaussian.log.to_system_data(file_name, md=md) except AssertionError: - return { - 'energies': [], - 'forces': [], - 'nopbc': True - } + return {"energies": [], "forces": [], "nopbc": True} @Format.register("gaussian/md") @@ -30,6 +26,7 @@ def from_labeled_system(self, file_name, **kwargs): @Format.register("gaussian/gjf") class GaussiaGJFFormat(Format): """Gaussian input file""" + def to_system(self, data: dict, file_name: str, **kwargs): """Generate Gaussian input file. @@ -43,7 +40,7 @@ def to_system(self, data: dict, file_name: str, **kwargs): Other parameters to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input` """ text = dpdata.gaussian.gjf.make_gaussian_input(data, **kwargs) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(text) @@ -69,18 +66,19 @@ class GaussianDriver(Driver): >>> labeled_system['energies'][0] -1102.714590995794 """ - def __init__(self, gaussian_exec: str="g16", **kwargs: dict) -> None: + + def __init__(self, gaussian_exec: str = "g16", **kwargs: dict) -> None: self.gaussian_exec = gaussian_exec self.kwargs = kwargs def label(self, data: dict) -> dict: """Label a system data. Returns new data with energy, forces, and virials. - + Parameters ---------- data : dict data with coordinates and atom types - + Returns ------- dict @@ -98,8 +96,6 @@ def label(self, data: dict) -> dict: except sp.CalledProcessError as e: with open(out_fn) as f: out = f.read() - raise RuntimeError( - "Run gaussian failed! Output:\n" + out - ) from e + raise RuntimeError("Run gaussian failed! Output:\n" + out) from e labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="gaussian/log")) return labeled_system.data diff --git a/dpdata/plugins/gromacs.py b/dpdata/plugins/gromacs.py index f4e3d5285..6f19a27f0 100644 --- a/dpdata/plugins/gromacs.py +++ b/dpdata/plugins/gromacs.py @@ -14,7 +14,9 @@ def from_system(self, file_name, format_atom_name=True, **kwargs): file_name : str The input file name """ - return dpdata.gromacs.gro.file_to_system_data(file_name, format_atom_name=format_atom_name, **kwargs) + return dpdata.gromacs.gro.file_to_system_data( + file_name, format_atom_name=format_atom_name, **kwargs + ) def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): """ @@ -27,20 +29,20 @@ def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) if frame_idx == -1: strs = [] - for idx in range(data['coords'].shape[0]): - gro_str = dpdata.gromacs.gro.from_system_data(data, f_idx=idx, - **kwargs) + for idx in range(data["coords"].shape[0]): + gro_str = dpdata.gromacs.gro.from_system_data(data, f_idx=idx, **kwargs) strs.append(gro_str) gro_str = "\n".join(strs) else: gro_str = dpdata.gromacs.gro.from_system_data( - data, f_idx=frame_idx, **kwargs) + data, f_idx=frame_idx, **kwargs + ) if file_name is None: return gro_str else: - with open(file_name, 'w+') as fp: + with open(file_name, "w+") as fp: fp.write(gro_str) diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 8f9962967..d4bce01b4 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -9,7 +9,7 @@ class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") def from_system(self, file_name, type_map=None, **kwargs): with open(file_name) as fp: - lines = [line.rstrip('\n') for line in fp] + lines = [line.rstrip("\n") for line in fp] return dpdata.lammps.lmp.to_system_data(lines, type_map) def to_system(self, data, file_name, frame_idx=0, **kwargs): @@ -25,9 +25,9 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) w_str = dpdata.lammps.lmp.from_system_data(data, frame_idx) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(w_str) @@ -35,12 +35,8 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): @Format.register("lammps/dump") class LAMMPSDumpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, - file_name, - type_map=None, - begin=0, - step=1, - unwrap=False, - **kwargs): + def from_system( + self, file_name, type_map=None, begin=0, step=1, unwrap=False, **kwargs + ): lines = dpdata.lammps.dump.load_file(file_name, begin=begin, step=step) return dpdata.lammps.dump.system_data(lines, type_map, unwrap=unwrap) diff --git a/dpdata/plugins/list.py b/dpdata/plugins/list.py index 0eca2e13d..99ac6d4a7 100644 --- a/dpdata/plugins/list.py +++ b/dpdata/plugins/list.py @@ -8,7 +8,8 @@ def to_system(self, data, **kwargs): convert system to list, usefull for data collection """ from dpdata import System, LabeledSystem - if 'forces' in data: + + if "forces" in data: system = LabeledSystem(data=data) else: system = System(data=data) diff --git a/dpdata/plugins/pwmat.py b/dpdata/plugins/pwmat.py index 3365806e5..baa415d6b 100644 --- a/dpdata/plugins/pwmat.py +++ b/dpdata/plugins/pwmat.py @@ -11,25 +11,30 @@ @Format.register("pwmat/output") class PwmatOutputFormat(Format): @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin=0, step=1, convergence_check=True, **kwargs): + def from_labeled_system( + self, file_name, begin=0, step=1, convergence_check=True, **kwargs + ): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial \ - = dpdata.pwmat.movement.get_frames(file_name, begin=begin, step=step, convergence_check=convergence_check) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.pwmat.movement.get_frames( + file_name, begin=begin, step=step, convergence_check=convergence_check + ) if tmp_virial is not None: - data['virials'] = tmp_virial + data["virials"] = tmp_virial # scale virial to the unit of eV - if 'virials' in data: + if "virials" in data: v_pref = 1 * 1e3 / 1.602176621e6 - for ii in range(data['coords'].shape[0]): - vol = np.linalg.det(np.reshape(data['cells'][ii], [3, 3])) - data['virials'][ii] *= v_pref * vol + for ii in range(data["coords"].shape[0]): + vol = np.linalg.det(np.reshape(data["cells"][ii], [3, 3])) + data["virials"][ii] *= v_pref * vol return data @@ -41,7 +46,7 @@ class PwmatAtomconfigFormat(Format): @Format.post("rot_lower_triangular") def from_system(self, file_name, **kwargs): with open(file_name) as fp: - lines = [line.rstrip('\n') for line in fp] + lines = [line.rstrip("\n") for line in fp] return dpdata.pwmat.atomconfig.to_system_data(lines) def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): @@ -55,7 +60,7 @@ def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) w_str = dpdata.pwmat.atomconfig.from_system_data(data, frame_idx) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/pymatgen.py b/dpdata/plugins/pymatgen.py index f29ac382b..514b8d76c 100644 --- a/dpdata/plugins/pymatgen.py +++ b/dpdata/plugins/pymatgen.py @@ -6,20 +6,23 @@ @Format.register("pymatgen/structure") class PyMatgenStructureFormat(Format): def to_system(self, data, **kwargs): - """convert System to Pymatgen Structure obj - """ + """convert System to Pymatgen Structure obj""" structures = [] try: from pymatgen.core import Structure except ModuleNotFoundError as e: - raise ImportError('No module pymatgen.Structure') from e + raise ImportError("No module pymatgen.Structure") from e species = [] - for name, numb in zip(data['atom_names'], data['atom_numbs']): - species.extend([name]*numb) - for ii in range(data['coords'].shape[0]): + for name, numb in zip(data["atom_names"], data["atom_numbs"]): + species.extend([name] * numb) + for ii in range(data["coords"].shape[0]): structure = Structure( - data['cells'][ii], species, data['coords'][ii], coords_are_cartesian=True) + data["cells"][ii], + species, + data["coords"][ii], + coords_are_cartesian=True, + ) structures.append(structure) return structures @@ -31,26 +34,24 @@ def from_system(self, file_name, **kwargs): try: from pymatgen.core import Molecule except ModuleNotFoundError as e: - raise ImportError('No module pymatgen.Molecule') from e + raise ImportError("No module pymatgen.Molecule") from e return dpdata.pymatgen.molecule.to_system_data(file_name) def to_system(self, data, **kwargs): - """convert System to Pymatgen Molecule obj - """ + """convert System to Pymatgen Molecule obj""" molecules = [] try: from pymatgen.core import Molecule except ModuleNotFoundError as e: - raise ImportError('No module pymatgen.Molecule') from e + raise ImportError("No module pymatgen.Molecule") from e species = [] - for name, numb in zip(data['atom_names'], data['atom_numbs']): - species.extend([name]*numb) + for name, numb in zip(data["atom_names"], data["atom_numbs"]): + species.extend([name] * numb) data = dpdata.system.remove_pbc(data) - for ii in range(np.array(data['coords']).shape[0]): - molecule = Molecule( - species, data['coords'][ii]) + for ii in range(np.array(data["coords"]).shape[0]): + molecule = Molecule(species, data["coords"][ii]) molecules.append(molecule) return molecules @@ -59,23 +60,20 @@ def to_system(self, data, **kwargs): @Format.register_to("to_pymatgen_ComputedStructureEntry") class PyMatgenCSEFormat(Format): def to_labeled_system(self, data, *args, **kwargs): - """convert System to Pymagen ComputedStructureEntry obj - """ + """convert System to Pymagen ComputedStructureEntry obj""" try: from pymatgen.entries.computed_entries import ComputedStructureEntry except ModuleNotFoundError as e: raise ImportError( - 'No module ComputedStructureEntry in pymatgen.entries.computed_entries') from e + "No module ComputedStructureEntry in pymatgen.entries.computed_entries" + ) from e entries = [] for ii, structure in enumerate(PyMatgenStructureFormat().to_system(data)): - energy = data['energies'][ii] - csedata = {'forces': data['forces'][ii], - 'virials': data['virials'][ii]} + energy = data["energies"][ii] + csedata = {"forces": data["forces"][ii], "virials": data["virials"][ii]} entry = ComputedStructureEntry(structure, energy, data=csedata) entries.append(entry) return entries - - diff --git a/dpdata/plugins/qe.py b/dpdata/plugins/qe.py index e6a1665ac..1b95a6d4e 100644 --- a/dpdata/plugins/qe.py +++ b/dpdata/plugins/qe.py @@ -3,41 +3,49 @@ import dpdata.md.pbc from dpdata.format import Format + @Format.register("qe/cp/traj") class QECPTrajFormat(Format): @Format.post("rot_lower_triangular") - def from_system(self, file_name, begin = 0, step = 1, **kwargs): - data, _ = dpdata.qe.traj.to_system_data(file_name + '.in', file_name, begin = begin, step = step) - data['coords'] \ - = dpdata.md.pbc.apply_pbc(data['coords'], - data['cells'], - ) + def from_system(self, file_name, begin=0, step=1, **kwargs): + data, _ = dpdata.qe.traj.to_system_data( + file_name + ".in", file_name, begin=begin, step=step + ) + data["coords"] = dpdata.md.pbc.apply_pbc( + data["coords"], + data["cells"], + ) return data @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin = 0, step = 1, **kwargs): - data, cs = dpdata.qe.traj.to_system_data(file_name + '.in', file_name, begin = begin, step = step) - data['coords'] \ - = dpdata.md.pbc.apply_pbc(data['coords'], - data['cells'], - ) - data['energies'], data['forces'], es \ - = dpdata.qe.traj.to_system_label(file_name + '.in', file_name, begin = begin, step = step) - assert(cs == es), "the step key between files are not consistent" + def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): + data, cs = dpdata.qe.traj.to_system_data( + file_name + ".in", file_name, begin=begin, step=step + ) + data["coords"] = dpdata.md.pbc.apply_pbc( + data["coords"], + data["cells"], + ) + data["energies"], data["forces"], es = dpdata.qe.traj.to_system_label( + file_name + ".in", file_name, begin=begin, step=step + ) + assert cs == es, "the step key between files are not consistent" return data + @Format.register("qe/pw/scf") class QECPPWSCFFormat(Format): @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'], \ - = dpdata.qe.scf.get_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.qe.scf.get_frame(file_name) return data diff --git a/dpdata/plugins/rdkit.py b/dpdata/plugins/rdkit.py index 9d40257a7..043fad1e6 100644 --- a/dpdata/plugins/rdkit.py +++ b/dpdata/plugins/rdkit.py @@ -1,4 +1,5 @@ from dpdata.format import Format + try: import rdkit.Chem import dpdata.rdkit.utils @@ -11,10 +12,9 @@ class MolFormat(Format): def from_bond_order_system(self, file_name, **kwargs): return rdkit.Chem.MolFromMolFile(file_name, sanitize=False, removeHs=False) - def to_bond_order_system(self, data, mol, file_name, frame_idx=0, **kwargs): - assert (frame_idx < mol.GetNumConformers()) + assert frame_idx < mol.GetNumConformers() rdkit.Chem.MolToMolFile(mol, file_name, confId=frame_idx) @@ -22,22 +22,25 @@ def to_bond_order_system(self, data, mol, file_name, frame_idx=0, **kwargs): @Format.register("sdf_file") class SdfFormat(Format): def from_bond_order_system(self, file_name, **kwargs): - ''' + """ Note that it requires all molecules in .sdf file must be of the same topology - ''' - mols = [m for m in rdkit.Chem.SDMolSupplier(file_name, sanitize=False, removeHs=False)] + """ + mols = [ + m + for m in rdkit.Chem.SDMolSupplier(file_name, sanitize=False, removeHs=False) + ] if len(mols) > 1: mol = dpdata.rdkit.utils.combine_molecules(mols) else: mol = mols[0] return mol - + def to_bond_order_system(self, data, mol, file_name, frame_idx=-1, **kwargs): sdf_writer = rdkit.Chem.SDWriter(file_name) if frame_idx == -1: for ii in range(mol.GetNumConformers()): sdf_writer.write(mol, confId=ii) else: - assert (frame_idx < mol.GetNumConformers()) + assert frame_idx < mol.GetNumConformers() sdf_writer.write(mol, confId=frame_idx) - sdf_writer.close() \ No newline at end of file + sdf_writer.close() diff --git a/dpdata/plugins/siesta.py b/dpdata/plugins/siesta.py index 6838dac37..5b38e8b2a 100644 --- a/dpdata/plugins/siesta.py +++ b/dpdata/plugins/siesta.py @@ -7,28 +7,30 @@ class SiestaOutputFormat(Format): def from_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - _e, \ - _f, \ - _v \ - = dpdata.siesta.output.obtain_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + _e, + _f, + _v, + ) = dpdata.siesta.output.obtain_frame(file_name) return data def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'] \ - = dpdata.siesta.output.obtain_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.siesta.output.obtain_frame(file_name) return data @@ -37,26 +39,28 @@ def from_labeled_system(self, file_name, **kwargs): class SiestaAIMDOutputFormat(Format): def from_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - _e, \ - _f, \ - _v \ - = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + _e, + _f, + _v, + ) = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) return data def from_labeled_system(self, file_name, **kwargs): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'] \ - = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.siesta.aiMD_output.get_aiMD_frame(file_name) return data diff --git a/dpdata/plugins/vasp.py b/dpdata/plugins/vasp.py index 07ec34f17..5b151f807 100644 --- a/dpdata/plugins/vasp.py +++ b/dpdata/plugins/vasp.py @@ -5,6 +5,7 @@ from dpdata.format import Format from dpdata.utils import sort_atom_names, uniq_atom_names + @Format.register("poscar") @Format.register("contcar") @Format.register("vasp/poscar") @@ -13,7 +14,7 @@ class VASPPoscarFormat(Format): @Format.post("rot_lower_triangular") def from_system(self, file_name, **kwargs): with open(file_name) as fp: - lines = [line.rstrip('\n') for line in fp] + lines = [line.rstrip("\n") for line in fp] data = dpdata.vasp.poscar.to_system_data(lines) data = uniq_atom_names(data) return data @@ -30,7 +31,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): The index of the frame to dump """ w_str = VASPStringFormat().to_system(data, frame_idx=frame_idx) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write(w_str) @@ -45,7 +46,7 @@ def to_system(self, data, frame_idx=0, **kwargs): frame_idx : int The index of the frame to dump """ - assert(frame_idx < len(data['coords'])) + assert frame_idx < len(data["coords"]) return dpdata.vasp.poscar.from_system_data(data, frame_idx) @@ -54,26 +55,35 @@ def to_system(self, data, frame_idx=0, **kwargs): @Format.register("vasp/outcar") class VASPOutcarFormat(Format): @Format.post("rot_lower_triangular") - def from_labeled_system(self, file_name, begin=0, step=1, convergence_check=True, **kwargs): + def from_labeled_system( + self, file_name, begin=0, step=1, convergence_check=True, **kwargs + ): data = {} ml = kwargs.get("ml", False) - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - tmp_virial, \ - = dpdata.vasp.outcar.get_frames(file_name, begin=begin, step=step, ml=ml, convergence_check=convergence_check) + ( + data["atom_names"], + data["atom_numbs"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + tmp_virial, + ) = dpdata.vasp.outcar.get_frames( + file_name, + begin=begin, + step=step, + ml=ml, + convergence_check=convergence_check, + ) if tmp_virial is not None: - data['virials'] = tmp_virial + data["virials"] = tmp_virial # scale virial to the unit of eV - if 'virials' in data: + if "virials" in data: v_pref = 1 * 1e3 / 1.602176621e6 - for ii in range(data['cells'].shape[0]): - vol = np.linalg.det(np.reshape(data['cells'][ii], [3, 3])) - data['virials'][ii] *= v_pref * vol + for ii in range(data["cells"].shape[0]): + vol = np.linalg.det(np.reshape(data["cells"][ii], [3, 3])) + data["virials"][ii] *= v_pref * vol data = uniq_atom_names(data) return data @@ -85,27 +95,28 @@ class VASPXMLFormat(Format): @Format.post("rot_lower_triangular") def from_labeled_system(self, file_name, begin=0, step=1, **kwargs): data = {} - data['atom_names'], \ - data['atom_types'], \ - data['cells'], \ - data['coords'], \ - data['energies'], \ - data['forces'], \ - data['virials'], \ - = dpdata.vasp.xml.analyze(file_name, type_idx_zero=True, begin=begin, step=step) - data['atom_numbs'] = [] - for ii in range(len(data['atom_names'])): - data['atom_numbs'].append(sum(data['atom_types'] == ii)) + ( + data["atom_names"], + data["atom_types"], + data["cells"], + data["coords"], + data["energies"], + data["forces"], + data["virials"], + ) = dpdata.vasp.xml.analyze( + file_name, type_idx_zero=True, begin=begin, step=step + ) + data["atom_numbs"] = [] + for ii in range(len(data["atom_names"])): + data["atom_numbs"].append(sum(data["atom_types"] == ii)) # the vasp xml assumes the direct coordinates # apply the transform to the cartesan coordinates - for ii in range(data['cells'].shape[0]): - data['coords'][ii] = np.matmul( - data['coords'][ii], data['cells'][ii]) + for ii in range(data["cells"].shape[0]): + data["coords"][ii] = np.matmul(data["coords"][ii], data["cells"][ii]) # scale virial to the unit of eV v_pref = 1 * 1e3 / 1.602176621e6 - for ii in range(data['cells'].shape[0]): - vol = np.linalg.det(np.reshape(data['cells'][ii], [3, 3])) - data['virials'][ii] *= v_pref * vol + for ii in range(data["cells"].shape[0]): + vol = np.linalg.det(np.reshape(data["cells"][ii], [3, 3])) + data["virials"][ii] *= v_pref * vol data = uniq_atom_names(data) return data - diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 7ec9d2665..4db722e3e 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -4,6 +4,7 @@ from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord from dpdata.format import Format + @Format.register("xyz") class XYZFormat(Format): """XYZ foramt. @@ -12,26 +13,29 @@ class XYZFormat(Format): -------- >>> s.to("xyz", "a.xyz") """ + def to_system(self, data, file_name, **kwargs): buff = [] - types = np.array(data['atom_names'])[data['atom_types']] - for cc in data['coords']: + types = np.array(data["atom_names"])[data["atom_types"]] + for cc in data["coords"]: buff.append(coord_to_xyz(cc, types)) - with open(file_name, 'w') as fp: + with open(file_name, "w") as fp: fp.write("\n".join(buff)) def from_system(self, file_name, **kwargs): - with open(file_name, 'r') as fp: + with open(file_name, "r") as fp: coords, types = xyz_to_coord(fp.read()) - atom_names, atom_types, atom_numbs = np.unique(types, return_inverse=True, return_counts=True) + atom_names, atom_types, atom_numbs = np.unique( + types, return_inverse=True, return_counts=True + ) return { - 'atom_names': list(atom_names), - 'atom_numbs': list(atom_numbs), - 'atom_types': atom_types, - 'coords': coords.reshape((1, *coords.shape)), - 'cells': np.eye(3).reshape((1, 3, 3)) * 100, - 'nopbc': True, - 'orig': np.zeros(3), + "atom_names": list(atom_names), + "atom_numbs": list(atom_numbs), + "atom_types": atom_types, + "coords": coords.reshape((1, *coords.shape)), + "cells": np.eye(3).reshape((1, 3, 3)) * 100, + "nopbc": True, + "orig": np.zeros(3), } diff --git a/dpdata/pwmat/__init__.py b/dpdata/pwmat/__init__.py index 8b1378917..e69de29bb 100644 --- a/dpdata/pwmat/__init__.py +++ b/dpdata/pwmat/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/pwmat/atomconfig.py b/dpdata/pwmat/atomconfig.py index bc06f3470..5e953c472 100644 --- a/dpdata/pwmat/atomconfig.py +++ b/dpdata/pwmat/atomconfig.py @@ -1,69 +1,70 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 from ..periodic_table import ELEMENTS import numpy as np -def _to_system_data_lower(lines) : + +def _to_system_data_lower(lines): system = {} natoms = int(lines[0].split()[0]) cell = [] for idx, ii in enumerate(lines): - if 'lattice' in ii or 'Lattice' in ii or 'LATTICE' in ii: - for kk in range(idx+1,idx+1+3): - vector=[float(jj) for jj in lines[kk].split()[0:3]] + if "lattice" in ii or "Lattice" in ii or "LATTICE" in ii: + for kk in range(idx + 1, idx + 1 + 3): + vector = [float(jj) for jj in lines[kk].split()[0:3]] cell.append(vector) - system['cells'] = np.array([cell]) + system["cells"] = np.array([cell]) coord = [] atomic_number = [] atom_numbs = [] for idx, ii in enumerate(lines): - if 'Position' in ii or 'POSITION' in ii or 'position' in ii: - for kk in range(idx+1,idx+1+natoms): + if "Position" in ii or "POSITION" in ii or "position" in ii: + for kk in range(idx + 1, idx + 1 + natoms): min = kk - for jj in range(kk+1,idx+1+natoms): + for jj in range(kk + 1, idx + 1 + natoms): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+natoms): + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + natoms): tmpv = [float(jj) for jj in lines[gg].split()[1:4]] - tmpv = np.matmul(np.array(tmpv), system['cells'][0]) + tmpv = np.matmul(np.array(tmpv), system["cells"][0]) coord.append(tmpv) tmpn = int(lines[gg].split()[0]) atomic_number.append(tmpn) - for ii in np.unique(sorted(atomic_number)) : + for ii in np.unique(sorted(atomic_number)): atom_numbs.append(atomic_number.count(ii)) - system['atom_numbs'] = [int(ii) for ii in atom_numbs] - system['coords'] = np.array([coord]) - system['orig'] = np.zeros(3) + system["atom_numbs"] = [int(ii) for ii in atom_numbs] + system["coords"] = np.array([coord]) + system["orig"] = np.zeros(3) atom_types = [] - for idx,ii in enumerate(system['atom_numbs']) : - for jj in range(ii) : + for idx, ii in enumerate(system["atom_numbs"]): + for jj in range(ii): atom_types.append(idx) - system['atom_types'] = np.array(atom_types, dtype = int) - system['atom_names'] = [ELEMENTS[ii-1] for ii in np.unique(sorted(atomic_number))] + system["atom_types"] = np.array(atom_types, dtype=int) + system["atom_names"] = [ELEMENTS[ii - 1] for ii in np.unique(sorted(atomic_number))] return system -def to_system_data(lines) : +def to_system_data(lines): return _to_system_data_lower(lines) -def from_system_data(system, f_idx = 0, skip_zeros = True) : - ret = '' - natoms = sum(system['atom_numbs']) - ret += '%d' % natoms - ret += '\n' - ret += 'LATTICE' - ret += '\n' - for ii in system['cells'][f_idx] : - for jj in ii : - ret += '%.16e ' % jj - ret += '\n' - ret += 'POSITION' - ret += '\n' - atom_numbs = system['atom_numbs'] - atom_names = system['atom_names'] - atype = system['atom_types'] - posis = system['coords'][f_idx] +def from_system_data(system, f_idx=0, skip_zeros=True): + ret = "" + natoms = sum(system["atom_numbs"]) + ret += "%d" % natoms + ret += "\n" + ret += "LATTICE" + ret += "\n" + for ii in system["cells"][f_idx]: + for jj in ii: + ret += "%.16e " % jj + ret += "\n" + ret += "POSITION" + ret += "\n" + atom_numbs = system["atom_numbs"] + atom_names = system["atom_names"] + atype = system["atom_types"] + posis = system["coords"][f_idx] # atype_idx = [[idx,tt] for idx,tt in enumerate(atype)] # sort_idx = np.argsort(atype, kind = 'mergesort') sort_idx = np.lexsort((np.arange(len(atype)), atype)) @@ -72,22 +73,20 @@ def from_system_data(system, f_idx = 0, skip_zeros = True) : symbal = [] for ii, jj in zip(atom_numbs, atom_names): for kk in range(ii): - symbal.append(jj) + symbal.append(jj) atomic_numbers = [] for ii in symbal: - atomic_numbers.append(ELEMENTS.index(ii)+1) + atomic_numbers.append(ELEMENTS.index(ii) + 1) posi_list = [] - for jj, ii in zip(atomic_numbers,posis) : - ii = np.matmul(ii, np.linalg.inv(system['cells'][0])) - posi_list.append('%d %15.10f %15.10f %15.10f 1 1 1' % \ - (jj, ii[0], ii[1], ii[2]) - ) + for jj, ii in zip(atomic_numbers, posis): + ii = np.matmul(ii, np.linalg.inv(system["cells"][0])) + posi_list.append("%d %15.10f %15.10f %15.10f 1 1 1" % (jj, ii[0], ii[1], ii[2])) for kk in range(len(posi_list)): min = kk - for jj in range(kk,len(posi_list)): + for jj in range(kk, len(posi_list)): if int(posi_list[jj].split()[0]) < int(posi_list[min].split()[0]): min = jj - posi_list[min], posi_list[kk] = posi_list[kk],posi_list[min] - posi_list.append('') - ret += '\n'.join(posi_list) + posi_list[min], posi_list[kk] = posi_list[kk], posi_list[min] + posi_list.append("") + ret += "\n".join(posi_list) return ret diff --git a/dpdata/pwmat/movement.py b/dpdata/pwmat/movement.py index c39950f0a..c2e0bf3a7 100644 --- a/dpdata/pwmat/movement.py +++ b/dpdata/pwmat/movement.py @@ -2,59 +2,61 @@ from ..periodic_table import ELEMENTS import warnings -def system_info (lines, type_idx_zero = False) : + +def system_info(lines, type_idx_zero=False): atom_names = [] atom_numbs = [] nelm = 0 natoms = int(lines[0].split()[0]) - iteration = float(lines[0].split('Etot')[0].split('=')[1].split(',')[0]) -# print(iteration) - if iteration > 0 : + iteration = float(lines[0].split("Etot")[0].split("=")[1].split(",")[0]) + # print(iteration) + if iteration > 0: nelm = 40 else: nelm = 100 atomic_number = [] - for idx,ii in enumerate(lines): - if 'Position' in ii: - for kk in range(idx+1,idx+1+natoms) : + for idx, ii in enumerate(lines): + if "Position" in ii: + for kk in range(idx + 1, idx + 1 + natoms): min = kk - for jj in range(kk+1,idx+1+natoms): + for jj in range(kk + 1, idx + 1 + natoms): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+natoms): + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + natoms): tmpn = int(lines[gg].split()[0]) atomic_number.append(tmpn) - for ii in np.unique(sorted(atomic_number)) : + for ii in np.unique(sorted(atomic_number)): atom_numbs.append(atomic_number.count(ii)) atom_types = [] - for idx,ii in enumerate(atom_numbs) : - for jj in range(ii) : - if type_idx_zero : + for idx, ii in enumerate(atom_numbs): + for jj in range(ii): + if type_idx_zero: atom_types.append(idx) - else : - atom_types.append(idx+1) + else: + atom_types.append(idx + 1) for ii in np.unique(sorted(atomic_number)): - atom_names.append(ELEMENTS[ii-1]) - return atom_names, atom_numbs, np.array(atom_types, dtype = int), nelm + atom_names.append(ELEMENTS[ii - 1]) + return atom_names, atom_numbs, np.array(atom_types, dtype=int), nelm -def get_movement_block(fp) : +def get_movement_block(fp): blk = [] - for ii in fp : + for ii in fp: if not ii: return blk - blk.append(ii.rstrip('\n')) - if '------------' in ii: + blk.append(ii.rstrip("\n")) + if "------------" in ii: return blk return blk + # we assume that the force is printed ... -def get_frames (fname, begin = 0, step = 1, convergence_check=True) : +def get_frames(fname, begin=0, step=1, convergence_check=True): fp = open(fname) blk = get_movement_block(fp) - atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero = True) + atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero=True) ntot = sum(atom_numbs) all_coords = [] @@ -62,13 +64,15 @@ def get_frames (fname, begin = 0, step = 1, convergence_check=True) : all_energies = [] all_atomic_energy = [] all_forces = [] - all_virials = [] + all_virials = [] cc = 0 rec_failed = [] - while len(blk) > 0 : - if cc >= begin and (cc - begin) % step == 0 : - coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm) + while len(blk) > 0: + if cc >= begin and (cc - begin) % step == 0: + coord, cell, energy, force, virial, is_converge = analyze_block( + blk, ntot, nelm + ) if len(coord) == 0: break if is_converge or not convergence_check: @@ -76,44 +80,60 @@ def get_frames (fname, begin = 0, step = 1, convergence_check=True) : all_cells.append(cell) all_energies.append(energy) all_forces.append(force) - if virial is not None : + if virial is not None: all_virials.append(virial) if not is_converge: - rec_failed.append(cc+1) - + rec_failed.append(cc + 1) + blk = get_movement_block(fp) cc += 1 - - if len(rec_failed) > 0 : - prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." - warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) - - if len(all_virials) == 0 : + + if len(rec_failed) > 0: + prt = ( + "so they are not collected." + if convergence_check + else "but they are still collected due to the requirement for ignoring convergence checks." + ) + warnings.warn( + f"The following structures were unconverged: {rec_failed}; " + prt + ) + + if len(all_virials) == 0: all_virials = None - else : + else: all_virials = np.array(all_virials) fp.close() - return atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), \ - np.array(all_energies), np.array(all_forces), all_virials + return ( + atom_names, + atom_numbs, + atom_types, + np.array(all_cells), + np.array(all_coords), + np.array(all_energies), + np.array(all_forces), + all_virials, + ) -def analyze_block(lines, ntot, nelm) : +def analyze_block(lines, ntot, nelm): coord = [] cell = [] energy = None -# atomic_energy = [] + # atomic_energy = [] force = [] virial = None is_converge = True sc_index = 0 - for idx,ii in enumerate(lines) : - if 'Iteration' in ii: - sc_index = int(ii.split('SCF =')[1]) + for idx, ii in enumerate(lines): + if "Iteration" in ii: + sc_index = int(ii.split("SCF =")[1]) if sc_index >= nelm: is_converge = False - energy = float(ii.split('Etot,Ep,Ek (eV)')[1].split()[2]) # use Ep, not Etot=Ep+Ek - elif '----------' in ii: - assert((force is not None) and len(coord) > 0 and len(cell) > 0) + energy = float( + ii.split("Etot,Ep,Ek (eV)")[1].split()[2] + ) # use Ep, not Etot=Ep+Ek + elif "----------" in ii: + assert (force is not None) and len(coord) > 0 and len(cell) > 0 # all_coords.append(coord) # all_cells.append(cell) # all_energies.append(energy) @@ -121,17 +141,16 @@ def analyze_block(lines, ntot, nelm) : # if virial is not None : # all_virials.append(virial) return coord, cell, energy, force, virial, is_converge -# elif 'NPT' in ii: -# tmp_v = [] - elif 'Lattice vector' in ii: - if 'stress' in lines[idx+1]: + # elif 'NPT' in ii: + # tmp_v = [] + elif "Lattice vector" in ii: + if "stress" in lines[idx + 1]: tmp_v = [] - for dd in range(3) : - tmp_l = lines[idx+1+dd] - cell.append([float(ss) - for ss in tmp_l.split()[0:3]]) + for dd in range(3): + tmp_l = lines[idx + 1 + dd] + cell.append([float(ss) for ss in tmp_l.split()[0:3]]) tmp_v.append([float(stress) for stress in tmp_l.split()[5:8]]) - virial = np.zeros([3,3]) + virial = np.zeros([3, 3]) virial[0][0] = tmp_v[0][0] virial[0][1] = tmp_v[0][1] virial[0][2] = tmp_v[0][2] @@ -142,43 +161,44 @@ def analyze_block(lines, ntot, nelm) : virial[2][1] = tmp_v[2][1] virial[2][2] = tmp_v[2][2] volume = np.linalg.det(np.array(cell)) - virial = virial*160.2*10.0/volume + virial = virial * 160.2 * 10.0 / volume else: - for dd in range(3) : - tmp_l = lines[idx+1+dd] - cell.append([float(ss) - for ss in tmp_l.split()[0:3]]) + for dd in range(3): + tmp_l = lines[idx + 1 + dd] + cell.append([float(ss) for ss in tmp_l.split()[0:3]]) -# else : -# for dd in range(3) : -# tmp_l = lines[idx+1+dd] -# cell.append([float(ss) -# for ss in tmp_l.split()[0:3]]) -# virial = np.zeros([3,3]) - elif 'Position' in ii: - for kk in range(idx+1, idx+1+ntot): + # else : + # for dd in range(3) : + # tmp_l = lines[idx+1+dd] + # cell.append([float(ss) + # for ss in tmp_l.split()[0:3]]) + # virial = np.zeros([3,3]) + elif "Position" in ii: + for kk in range(idx + 1, idx + 1 + ntot): min = kk - for jj in range(kk+1,idx+1+ntot): + for jj in range(kk + 1, idx + 1 + ntot): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+ntot): + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + ntot): info = [float(jj) for jj in lines[gg].split()[1:4]] - info = np.matmul(np.array(info),np.array(cell)) + info = np.matmul(np.array(info), np.array(cell)) coord.append(info) - elif 'Force' in ii: - for kk in range(idx+1, idx+1+ntot): + elif "Force" in ii: + for kk in range(idx + 1, idx + 1 + ntot): min = kk - for jj in range(kk+1,idx+1+ntot): + for jj in range(kk + 1, idx + 1 + ntot): if int(lines[jj].split()[0]) < int(lines[min].split()[0]): min = jj - lines[min], lines[kk] = lines[kk],lines[min] - for gg in range(idx+1,idx+1+ntot): - info = [-float(ss) for ss in lines[gg].split()] # forces in MOVEMENT file are dE/dR, lacking a minus sign + lines[min], lines[kk] = lines[kk], lines[min] + for gg in range(idx + 1, idx + 1 + ntot): + info = [ + -float(ss) for ss in lines[gg].split() + ] # forces in MOVEMENT file are dE/dR, lacking a minus sign force.append(info[1:4]) -# elif 'Atomic-Energy' in ii: -# for jj in range(idx+1, idx+1+ntot) : -# tmp_l = lines[jj] -# info = [float(ss) for ss in tmp_l.split()] -# atomic_energy.append(info[1]) + # elif 'Atomic-Energy' in ii: + # for jj in range(idx+1, idx+1+ntot) : + # tmp_l = lines[jj] + # info = [float(ss) for ss in tmp_l.split()] + # atomic_energy.append(info[1]) return coord, cell, energy, force, virial, is_converge diff --git a/dpdata/pymatgen/molecule.py b/dpdata/pymatgen/molecule.py index a362bb539..c2559bef2 100644 --- a/dpdata/pymatgen/molecule.py +++ b/dpdata/pymatgen/molecule.py @@ -1,4 +1,5 @@ import numpy as np + try: from pymatgen.core import Molecule except ImportError: @@ -6,7 +7,8 @@ from collections import Counter import dpdata -def to_system_data(file_name, protect_layer = 9) : + +def to_system_data(file_name, protect_layer=9): mol = Molecule.from_file(file_name) elem_mol = list(str(site.species.elements[0]) for site in mol.sites) elem_counter = Counter(elem_mol) @@ -14,16 +16,16 @@ def to_system_data(file_name, protect_layer = 9) : atom_numbs = list(elem_counter.values()) atom_types = [list(atom_names).index(e) for e in elem_mol] natoms = np.sum(atom_numbs) - + tmpcoord = np.copy(mol.cart_coords) system = {} - system['atom_names'] = atom_names - system['atom_numbs'] = atom_numbs - system['atom_types'] = np.array(atom_types, dtype = int) + system["atom_names"] = atom_names + system["atom_numbs"] = atom_numbs + system["atom_types"] = np.array(atom_types, dtype=int) # center = [c - h_cell_size for c in mol.center_of_mass] - system['orig'] = np.array([0, 0, 0]) + system["orig"] = np.array([0, 0, 0]) - system['coords'] = np.array([tmpcoord]) - system['cells'] = np.array([10.0 * np.eye(3)]) + system["coords"] = np.array([tmpcoord]) + system["cells"] = np.array([10.0 * np.eye(3)]) return system diff --git a/dpdata/qe/__init__.py b/dpdata/qe/__init__.py index d3f5a12fa..e69de29bb 100644 --- a/dpdata/qe/__init__.py +++ b/dpdata/qe/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py index 50312aee3..afdb6ae12 100755 --- a/dpdata/qe/scf.py +++ b/dpdata/qe/scf.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 -import os,sys +import os, sys import numpy as np ry2ev = 13.605693009 bohr2ang = 0.52917721067 kbar2evperang3 = 1e3 / 1.602176621e6 -def get_block (lines, keyword, skip = 0) : + +def get_block(lines, keyword, skip=0): ret = [] - for idx,ii in enumerate(lines) : - if keyword in ii : + for idx, ii in enumerate(lines): + if keyword in ii: blk_idx = idx + 1 + skip while len(lines[blk_idx]) == 0: blk_idx += 1 @@ -20,18 +21,21 @@ def get_block (lines, keyword, skip = 0) : break return ret -def get_cell (lines) : + +def get_cell(lines): ret = [] - for idx,ii in enumerate(lines): - if 'ibrav' in ii : + for idx, ii in enumerate(lines): + if "ibrav" in ii: break - blk = lines[idx:idx+2] - ibrav = int(blk[0].replace(',','').split('=')[-1]) + blk = lines[idx : idx + 2] + ibrav = int(blk[0].replace(",", "").split("=")[-1]) if ibrav == 0: for iline in lines: - if 'CELL_PARAMETERS' in iline and 'angstrom' not in iline.lower(): - raise RuntimeError("CELL_PARAMETERS must be written in Angstrom. Other units are not supported yet.") - blk = get_block(lines, 'CELL_PARAMETERS') + if "CELL_PARAMETERS" in iline and "angstrom" not in iline.lower(): + raise RuntimeError( + "CELL_PARAMETERS must be written in Angstrom. Other units are not supported yet." + ) + blk = get_block(lines, "CELL_PARAMETERS") for ii in blk: ret.append([float(jj) for jj in ii.split()[0:3]]) ret = np.array(ret) @@ -40,32 +44,37 @@ def get_cell (lines) : for iline in lines: line = iline.replace("=", " ").replace(",", "").split() if len(line) >= 2 and "a" == line[0]: - #print("line = ", line) + # print("line = ", line) a = float(line[1]) if len(line) >= 2 and "celldm(1)" == line[0]: - a = float(line[1])*bohr2ang - #print("a = ", a) + a = float(line[1]) * bohr2ang + # print("a = ", a) if not a: raise RuntimeError("parameter 'a' or 'celldm(1)' cannot be found.") - ret = np.array([[a,0.,0.],[0.,a,0.],[0.,0.,a]]) + ret = np.array([[a, 0.0, 0.0], [0.0, a, 0.0], [0.0, 0.0, a]]) else: - sys.exit('ibrav > 1 not supported yet.') + sys.exit("ibrav > 1 not supported yet.") return ret -def get_coords (lines, cell) : + +def get_coords(lines, cell): coord = [] atom_symbol_list = [] for iline in lines: - if 'ATOMIC_POSITIONS' in iline and ('angstrom' not in iline.lower() and 'crystal' not in iline.lower()): - raise RuntimeError("ATOMIC_POSITIONS must be written in Angstrom or crystal. Other units are not supported yet.") - if 'ATOMIC_POSITIONS' in iline and 'angstrom' in iline.lower(): - blk = get_block(lines, 'ATOMIC_POSITIONS') + if "ATOMIC_POSITIONS" in iline and ( + "angstrom" not in iline.lower() and "crystal" not in iline.lower() + ): + raise RuntimeError( + "ATOMIC_POSITIONS must be written in Angstrom or crystal. Other units are not supported yet." + ) + if "ATOMIC_POSITIONS" in iline and "angstrom" in iline.lower(): + blk = get_block(lines, "ATOMIC_POSITIONS") for ii in blk: coord.append([float(jj) for jj in ii.split()[1:4]]) atom_symbol_list.append(ii.split()[0]) coord = np.array(coord) - elif 'ATOMIC_POSITIONS' in iline and 'crystal' in iline.lower(): - blk = get_block(lines, 'ATOMIC_POSITIONS') + elif "ATOMIC_POSITIONS" in iline and "crystal" in iline.lower(): + blk = get_block(lines, "ATOMIC_POSITIONS") for ii in blk: coord.append([float(jj) for jj in ii.split()[1:4]]) atom_symbol_list.append(ii.split()[0]) @@ -75,11 +84,11 @@ def get_coords (lines, cell) : tmp_names, symbol_idx = np.unique(atom_symbol_list, return_index=True) atom_types = [] atom_numbs = [] - #preserve the atom_name order + # preserve the atom_name order atom_names = atom_symbol_list[np.sort(symbol_idx)] for jj in atom_symbol_list: for idx, ii in enumerate(atom_names): - if (jj == ii) : + if jj == ii: atom_types.append(idx) for idx in range(len(atom_names)): atom_numbs.append(atom_types.count(idx)) @@ -87,51 +96,63 @@ def get_coords (lines, cell) : return list(atom_names), atom_numbs, atom_types, coord -def get_energy (lines) : + +def get_energy(lines): energy = None - for ii in lines : - if '! total energy' in ii : - energy = ry2ev * float(ii.split('=')[1].split()[0]) + for ii in lines: + if "! total energy" in ii: + energy = ry2ev * float(ii.split("=")[1].split()[0]) return energy -def get_force (lines) : - blk = get_block(lines, 'Forces acting on atoms', skip = 1) + +def get_force(lines): + blk = get_block(lines, "Forces acting on atoms", skip=1) ret = [] for ii in blk: - ret.append([float(jj) for jj in ii.split('=')[1].split()]) + ret.append([float(jj) for jj in ii.split("=")[1].split()]) ret = np.array(ret) - ret *= (ry2ev / bohr2ang) + ret *= ry2ev / bohr2ang return ret -def get_stress (lines) : - blk = get_block(lines, 'total stress') + +def get_stress(lines): + blk = get_block(lines, "total stress") ret = [] for ii in blk: ret.append([float(jj) for jj in ii.split()[3:6]]) ret = np.array(ret) ret *= kbar2evperang3 return ret - -def get_frame (fname): + + +def get_frame(fname): if type(fname) == str: path_out = fname outname = os.path.basename(path_out) - # the name of the input file is assumed to be different from the output by 'in' and 'out' - inname = outname.replace('out', 'in') + # the name of the input file is assumed to be different from the output by 'in' and 'out' + inname = outname.replace("out", "in") path_in = os.path.join(os.path.dirname(path_out), inname) elif type(fname) == list and len(fname) == 2: path_in = fname[0] path_out = fname[1] else: - raise RuntimeError('invalid input') - with open(path_out, 'r') as fp: - outlines = fp.read().split('\n') - with open(path_in, 'r') as fp: - inlines = fp.read().split('\n') - cell = get_cell (inlines) - atom_names, natoms, types, coords = get_coords(inlines, cell) - energy = get_energy(outlines) - force = get_force (outlines) - stress = get_stress(outlines) * np.linalg.det(cell) - return atom_names, natoms, types, cell[np.newaxis, :, :], coords[np.newaxis, :, :], \ - np.array(energy)[np.newaxis], force[np.newaxis, :, :], stress[np.newaxis, :, :] + raise RuntimeError("invalid input") + with open(path_out, "r") as fp: + outlines = fp.read().split("\n") + with open(path_in, "r") as fp: + inlines = fp.read().split("\n") + cell = get_cell(inlines) + atom_names, natoms, types, coords = get_coords(inlines, cell) + energy = get_energy(outlines) + force = get_force(outlines) + stress = get_stress(outlines) * np.linalg.det(cell) + return ( + atom_names, + natoms, + types, + cell[np.newaxis, :, :], + coords[np.newaxis, :, :], + np.array(energy)[np.newaxis], + force[np.newaxis, :, :], + stress[np.newaxis, :, :], + ) diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py index 62b10d440..1bdacab3e 100644 --- a/dpdata/qe/traj.py +++ b/dpdata/qe/traj.py @@ -1,7 +1,12 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 import numpy as np import dpdata, warnings -from ..unit import EnergyConversion, LengthConversion, ForceConversion, PressureConversion +from ..unit import ( + EnergyConversion, + LengthConversion, + ForceConversion, + PressureConversion, +) ry2ev = EnergyConversion("rydberg", "eV").value() kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value() @@ -10,81 +15,89 @@ energy_convert = EnergyConversion("hartree", "eV").value() force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() -def load_key (lines, key) : - for ii in lines : - if key in ii : - words = ii.split(',') - for jj in words : - if key in jj : - return jj.split('=')[1] + +def load_key(lines, key): + for ii in lines: + if key in ii: + words = ii.split(",") + for jj in words: + if key in jj: + return jj.split("=")[1] return None -def load_block(lines, key, nlines) : - for idx,ii in enumerate(lines) : - if key in ii : + +def load_block(lines, key, nlines): + for idx, ii in enumerate(lines): + if key in ii: break - return lines[idx+1:idx+1+nlines] - -def convert_celldm(ibrav, celldm) : - if ibrav == 1 : - return celldm[0] * np.eye(3) - elif ibrav == 2 : - return celldm[0] * 0.5 * np.array([[-1,0,1], [0,1,1], [-1,1,0]]) - elif ibrav == 3 : - return celldm[0] * 0.5 * np.array([[1,1,1], [-1,1,1], [-1,-1,1]]) - elif ibrav == -3 : - return celldm[0] * 0.5 * np.array([[-1,1,1], [1,-1,1], [1,1,-1]]) - else : - warnings.warn('unsupported ibrav ' + str(ibrav) + ' if no .cel file, the cell convertion may be wrong. ') + return lines[idx + 1 : idx + 1 + nlines] + + +def convert_celldm(ibrav, celldm): + if ibrav == 1: + return celldm[0] * np.eye(3) + elif ibrav == 2: + return celldm[0] * 0.5 * np.array([[-1, 0, 1], [0, 1, 1], [-1, 1, 0]]) + elif ibrav == 3: + return celldm[0] * 0.5 * np.array([[1, 1, 1], [-1, 1, 1], [-1, -1, 1]]) + elif ibrav == -3: + return celldm[0] * 0.5 * np.array([[-1, 1, 1], [1, -1, 1], [1, 1, -1]]) + else: + warnings.warn( + "unsupported ibrav " + + str(ibrav) + + " if no .cel file, the cell convertion may be wrong. " + ) return np.eye(3) - #raise RuntimeError('unsupported ibrav ' + str(ibrav)) + # raise RuntimeError('unsupported ibrav ' + str(ibrav)) + -def load_cell_parameters(lines) : - blk = load_block(lines, 'CELL_PARAMETERS', 3) +def load_cell_parameters(lines): + blk = load_block(lines, "CELL_PARAMETERS", 3) ret = [] - for ii in blk : + for ii in blk: ret.append([float(jj) for jj in ii.split()[0:3]]) return np.array(ret) -def load_atom_names(lines, ntypes) : - blk = load_block(lines, 'ATOMIC_SPECIES', ntypes) +def load_atom_names(lines, ntypes): + blk = load_block(lines, "ATOMIC_SPECIES", ntypes) return [ii.split()[0] for ii in blk] -def load_celldm(lines) : +def load_celldm(lines): celldm = np.zeros(6) for ii in range(6): - key = 'celldm(%d)' % (ii+1) + key = "celldm(%d)" % (ii + 1) val = load_key(lines, key) - if val is not None : - celldm[ii] = float(val) + if val is not None: + celldm[ii] = float(val) return celldm -def load_atom_types(lines, natoms, atom_names) : - blk = load_block(lines, 'ATOMIC_POSITIONS', natoms) +def load_atom_types(lines, natoms, atom_names): + blk = load_block(lines, "ATOMIC_POSITIONS", natoms) ret = [] - for ii in blk : + for ii in blk: ret.append(atom_names.index(ii.split()[0])) - return np.array(ret, dtype = int) + return np.array(ret, dtype=int) -def load_param_file(fname) : +def load_param_file(fname): with open(fname) as fp: - lines = fp.read().split('\n') - natoms = int(load_key(lines, 'nat')) - ntypes = int(load_key(lines, 'ntyp')) + lines = fp.read().split("\n") + natoms = int(load_key(lines, "nat")) + ntypes = int(load_key(lines, "ntyp")) atom_names = load_atom_names(lines, ntypes) atom_types = load_atom_types(lines, natoms, atom_names) atom_numbs = [] - for ii in range(ntypes) : + for ii in range(ntypes): atom_numbs.append(np.sum(atom_types == ii)) - ibrav = int(load_key(lines, 'ibrav')) + ibrav = int(load_key(lines, "ibrav")) celldm = load_celldm(lines) - if ibrav == 0 : - cell = load_cell_parameters(lines) - else : + if ibrav == 0: + cell = load_cell_parameters(lines) + else: cell = convert_celldm(ibrav, celldm) cell = cell * length_convert # print(atom_names) @@ -94,41 +107,37 @@ def load_param_file(fname) : return atom_names, atom_numbs, atom_types, cell -def _load_pos_block(fp, natoms) : +def _load_pos_block(fp, natoms): head = fp.readline() if not head: # print('get None') return None, None - else : + else: ss = head.split()[0] blk = [] - for ii in range(natoms) : + for ii in range(natoms): newline = fp.readline() - if not newline : + if not newline: return None, None blk.append([float(jj) for jj in newline.split()]) return blk, ss -def load_data(fname, - natoms, - begin = 0, - step = 1, - convert = 1.) : +def load_data(fname, natoms, begin=0, step=1, convert=1.0): coords = [] steps = [] cc = 0 with open(fname) as fp: while True: blk, ss = _load_pos_block(fp, natoms) - if blk == None : + if blk == None: break - else : - if cc >= begin and (cc - begin) % step == 0 : + else: + if cc >= begin and (cc - begin) % step == 0: coords.append(blk) steps.append(ss) cc += 1 - coords= convert * np.array(coords) + coords = convert * np.array(coords) return coords, steps @@ -146,21 +155,21 @@ def load_data(fname, # return coords -def load_energy(fname, begin = 0, step = 1) : +def load_energy(fname, begin=0, step=1): data = np.loadtxt(fname) steps = [] - for ii in data[begin::step,0]: - steps.append('%d'%ii) + for ii in data[begin::step, 0]: + steps.append("%d" % ii) with open(fname) as fp: while True: line = fp.readline() - if not line : + if not line: return None - if line.split()[0][0] != '#': + if line.split()[0][0] != "#": nw = len(line.split()) break data = np.reshape(data, [-1, nw]) - return energy_convert * data[begin::step,5], steps + return energy_convert * data[begin::step, 5], steps # def load_force(fname, natoms) : @@ -177,61 +186,59 @@ def load_energy(fname, begin = 0, step = 1) : # return coords -def to_system_data(input_name, prefix, begin = 0, step = 1) : +def to_system_data(input_name, prefix, begin=0, step=1): data = {} - data['atom_names'], \ - data['atom_numbs'], \ - data['atom_types'], \ - cell \ - = load_param_file(input_name) - data['coords'], csteps\ - = load_data(prefix + '.pos', - np.sum(data['atom_numbs']), - begin = begin, - step = step, - convert = length_convert) - data['orig'] = np.zeros(3) - try : - data['cells'], tmp_steps \ - = load_data(prefix + '.cel', - 3, - begin = begin, - step = step, - convert = length_convert) + data["atom_names"], data["atom_numbs"], data["atom_types"], cell = load_param_file( + input_name + ) + data["coords"], csteps = load_data( + prefix + ".pos", + np.sum(data["atom_numbs"]), + begin=begin, + step=step, + convert=length_convert, + ) + data["orig"] = np.zeros(3) + try: + data["cells"], tmp_steps = load_data( + prefix + ".cel", 3, begin=begin, step=step, convert=length_convert + ) if csteps != tmp_steps: csteps.append(None) tmp_steps.append(None) for int_id in range(len(csteps)): if csteps[int_id] != tmp_steps[int_id]: break - step_id = begin + int_id*step - raise RuntimeError(f"the step key between files are not consistent. " - f"The difference locates at step: {step_id}, " - f".pos is {csteps[int_id]}, .cel is {tmp_steps[int_id]}") - except FileNotFoundError : - data['cells'] = np.tile(cell, (data['coords'].shape[0], 1, 1)) + step_id = begin + int_id * step + raise RuntimeError( + f"the step key between files are not consistent. " + f"The difference locates at step: {step_id}, " + f".pos is {csteps[int_id]}, .cel is {tmp_steps[int_id]}" + ) + except FileNotFoundError: + data["cells"] = np.tile(cell, (data["coords"].shape[0], 1, 1)) return data, csteps -def to_system_label(input_name, prefix, begin = 0, step = 1) : +def to_system_label(input_name, prefix, begin=0, step=1): atom_names, atom_numbs, atom_types, cell = load_param_file(input_name) - energy, esteps = load_energy(prefix + '.evp', - begin = begin, - step = step) - force, fsteps = load_data(prefix + '.for', - np.sum(atom_numbs), - begin = begin, - step = step, - convert = force_convert) - assert(esteps == fsteps), "the step key between files are not consistent " + energy, esteps = load_energy(prefix + ".evp", begin=begin, step=step) + force, fsteps = load_data( + prefix + ".for", + np.sum(atom_numbs), + begin=begin, + step=step, + convert=force_convert, + ) + assert esteps == fsteps, "the step key between files are not consistent " return energy, force, esteps -if __name__ == '__main__': - prefix='nacl' - atom_names, atom_numbs, atom_types, cell = load_param_file(prefix+'.in') - coords = load_data(prefix+'.pos', np.sum(atom_numbs)) - cells = load_data(prefix+'.cel', 3) +if __name__ == "__main__": + prefix = "nacl" + atom_names, atom_numbs, atom_types, cell = load_param_file(prefix + ".in") + coords = load_data(prefix + ".pos", np.sum(atom_numbs)) + cells = load_data(prefix + ".cel", 3) print(atom_names) print(atom_numbs) print(atom_types) diff --git a/dpdata/rdkit/sanitize.py b/dpdata/rdkit/sanitize.py index 0e1637402..f3b1690eb 100644 --- a/dpdata/rdkit/sanitize.py +++ b/dpdata/rdkit/sanitize.py @@ -14,13 +14,16 @@ def get_explicit_valence(atom, verbose=False): - exp_val_calculated_from_bonds = int(sum([bond.GetBondTypeAsDouble() for bond in atom.GetBonds()])) + exp_val_calculated_from_bonds = int( + sum([bond.GetBondTypeAsDouble() for bond in atom.GetBonds()]) + ) try: exp_val = atom.GetExplicitValence() if exp_val != exp_val_calculated_from_bonds: if verbose: print( - f"Explicit valence given by GetExplicitValence() and sum of bond order are inconsistent on {atom.GetSymbol()}{atom.GetIdx() + 1}, using sum of bond order.") + f"Explicit valence given by GetExplicitValence() and sum of bond order are inconsistent on {atom.GetSymbol()}{atom.GetIdx() + 1}, using sum of bond order." + ) return exp_val_calculated_from_bonds except Exception: return exp_val_calculated_from_bonds @@ -45,7 +48,7 @@ def regularize_formal_charges(mol, sanitize=True, verbose=False): def assign_formal_charge_for_atom(atom, verbose=False): """ - assigen formal charge according to 8-electron rule for element B,C,N,O,S,P,As + assigen formal charge according to 8-electron rule for element B,C,N,O,S,P,As """ assert isinstance(atom, Chem.rdchem.Atom) valence = get_explicit_valence(atom, verbose) @@ -55,7 +58,8 @@ def assign_formal_charge_for_atom(atom, verbose=False): atom.SetFormalCharge(valence - 4) if valence == 3: print( - f"Detect a valence of 3 on #C{atom.GetIdx() + 1}, the formal charge of this atom will be assigned to -1") + f"Detect a valence of 3 on #C{atom.GetIdx() + 1}, the formal charge of this atom will be assigned to -1" + ) elif valence > 4: raise ValueError(f"#C{atom.GetIdx() + 1} has a valence larger than 4") elif atom.GetSymbol() == "N": @@ -78,7 +82,9 @@ def assign_formal_charge_for_atom(atom, verbose=False): if valence == 5: atom.SetFormalCharge(0) elif valence > 5: - raise ValueError(f"#{atom.GetSymbol()}{atom.GetIdx() + 1} has a valence larger than 5") + raise ValueError( + f"#{atom.GetSymbol()}{atom.GetIdx() + 1} has a valence larger than 5" + ) else: atom.SetFormalCharge(valence - 3) @@ -89,12 +95,15 @@ def print_bonds(mol): begin_atom = bond.GetBeginAtom() end_atom = bond.GetEndAtom() print( - f'{begin_atom.GetSymbol()}{begin_atom.GetIdx() + 1} {end_atom.GetSymbol()}{end_atom.GetIdx() + 1} {bond.GetBondType()}') + f"{begin_atom.GetSymbol()}{begin_atom.GetIdx() + 1} {end_atom.GetSymbol()}{end_atom.GetIdx() + 1} {bond.GetBondType()}" + ) def print_atoms(mol): for atom in mol.GetAtoms(): - print(f'{atom.GetSymbol()}{atom.GetIdx() + 1} {atom.GetFormalCharge()} {get_explicit_valence(atom)}') + print( + f"{atom.GetSymbol()}{atom.GetIdx() + 1} {atom.GetFormalCharge()} {get_explicit_valence(atom)}" + ) def is_terminal_oxygen(O_atom): @@ -120,7 +129,11 @@ def get_terminal_NR2s(atom): if nei.GetSymbol() == "N": if is_terminal_NR2(nei): terminal_NR2s.append(nei) - terminal_NR2s.sort(key=lambda N_atom: len([atom for atom in N_atom.GetNeighbors() if atom.GetSymbol() == 'H'])) + terminal_NR2s.sort( + key=lambda N_atom: len( + [atom for atom in N_atom.GetNeighbors() if atom.GetSymbol() == "H"] + ) + ) return terminal_NR2s @@ -132,10 +145,14 @@ def sanitize_phosphate_Patom(P_atom, verbose=True): if verbose: print("Phospate group detected, sanitizing it...") # set one P=O and two P-O - bond1 = mol.GetBondBetweenAtoms(P_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + P_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.DOUBLE) for ii in range(1, len(terminal_oxygens)): - bond = mol.GetBondBetweenAtoms(P_atom.GetIdx(), terminal_oxygens[ii].GetIdx()) + bond = mol.GetBondBetweenAtoms( + P_atom.GetIdx(), terminal_oxygens[ii].GetIdx() + ) bond.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[ii].SetFormalCharge(-1) @@ -154,11 +171,15 @@ def sanitize_sulfate_Satom(S_atom, verbose=True): if verbose: print("Sulfate group detected, sanitizing it...") # set one S-O and two S=O - bond1 = mol.GetBondBetweenAtoms(S_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + S_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[0].SetFormalCharge(-1) for ii in range(1, len(terminal_oxygens)): - bond = mol.GetBondBetweenAtoms(S_atom.GetIdx(), terminal_oxygens[ii].GetIdx()) + bond = mol.GetBondBetweenAtoms( + S_atom.GetIdx(), terminal_oxygens[ii].GetIdx() + ) bond.SetBondType(Chem.rdchem.BondType.DOUBLE) @@ -176,11 +197,15 @@ def sanitize_carboxyl_Catom(C_atom, verbose=True): if verbose: print("Carbonxyl group detected, sanitizing it...") # set one C-O and one C=O - bond1 = mol.GetBondBetweenAtoms(C_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + C_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[0].SetFormalCharge(-1) - bond2 = mol.GetBondBetweenAtoms(C_atom.GetIdx(), terminal_oxygens[1].GetIdx()) + bond2 = mol.GetBondBetweenAtoms( + C_atom.GetIdx(), terminal_oxygens[1].GetIdx() + ) bond2.SetBondType(Chem.rdchem.BondType.DOUBLE) terminal_oxygens[1].SetFormalCharge(0) @@ -226,11 +251,15 @@ def sanitize_nitro_Natom(N_atom, verbose=True): if verbose: print("Nitro group detected, sanitizing it...") # set one N-O and one N=O - bond1 = mol.GetBondBetweenAtoms(N_atom.GetIdx(), terminal_oxygens[0].GetIdx()) + bond1 = mol.GetBondBetweenAtoms( + N_atom.GetIdx(), terminal_oxygens[0].GetIdx() + ) bond1.SetBondType(Chem.rdchem.BondType.SINGLE) terminal_oxygens[0].SetFormalCharge(-1) - bond2 = mol.GetBondBetweenAtoms(N_atom.GetIdx(), terminal_oxygens[1].GetIdx()) + bond2 = mol.GetBondBetweenAtoms( + N_atom.GetIdx(), terminal_oxygens[1].GetIdx() + ) bond2.SetBondType(Chem.rdchem.BondType.DOUBLE) terminal_oxygens[1].SetFormalCharge(0) @@ -242,7 +271,7 @@ def sanitize_nitro(mol): def is_terminal_nitrogen(N_atom): - if N_atom.GetSymbol() == 'N' and len(N_atom.GetNeighbors()) == 1: + if N_atom.GetSymbol() == "N" and len(N_atom.GetNeighbors()) == 1: return True else: return False @@ -342,7 +371,9 @@ def kekulize_aromatic_heterocycles(mol_in, assign_formal_charge=True, sanitize=T rings = [list(i) for i in list(rings)] rings.sort(key=lambda r: len(r)) - def search_and_assign_ring(mol, ring, hetero, start, forward=True, start_switch=True): + def search_and_assign_ring( + mol, ring, hetero, start, forward=True, start_switch=True + ): j = start switch = start_switch lring = len(ring) @@ -370,7 +401,11 @@ def print_bondtypes(mol, rings): lring = len(ring) btype = [] for i in range(lring): - btype.append(mol.GetBondBetweenAtoms(ring[i], ring[(i + 1) % lring]).GetBondType()) + btype.append( + mol.GetBondBetweenAtoms( + ring[i], ring[(i + 1) % lring] + ).GetBondType() + ) atoms = [mol.GetAtomWithIdx(i).GetSymbol() for i in ring] print(ring) print(atoms) @@ -381,9 +416,9 @@ def hetero_priority(idx, mol): sym = atom.GetSymbol() valence = len(atom.GetBonds()) - if (sym in ['O', 'S']) & (valence == 2): + if (sym in ["O", "S"]) & (valence == 2): return 0 - elif (sym in ['N', 'P', 'As', 'B']): + elif sym in ["N", "P", "As", "B"]: if valence == 3: return 1 elif valence == 2: @@ -398,7 +433,7 @@ def hetero_priority(idx, mol): bAllC = True for i in range(lring): atom = mol.GetAtomWithIdx(ring[i]) - if atom.GetSymbol() != 'C': + if atom.GetSymbol() != "C": bAllC = False bond = mol.GetBondBetweenAtoms(ring[i], ring[(i + 1) % lring]) @@ -431,7 +466,7 @@ def hetero_priority(idx, mol): if (fuseCAr[i] == fuseCAr[i - 1]) & (fuseCAr[i] >= 0): fuseDouble.append(i) atom = mol.GetAtomWithIdx(ring[i]) - if atom.GetSymbol() != 'C': + if atom.GetSymbol() != "C": hetero.append(i) atom_bonds = atom.GetBonds() btype = [bond.GetBondType() for bond in atom_bonds] @@ -451,40 +486,58 @@ def hetero_priority(idx, mol): for i in hasDouble: d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True) d2, e2 = search_and_assign_ring(mol, ring, hetero, i, forward=False) - n_targetDouble -= (d1 + d2 + 1) - n_targetEdit -= (e1 + e2) + n_targetDouble -= d1 + d2 + 1 + n_targetEdit -= e1 + e2 for i in fuseDouble: bond = mol.GetBondBetweenAtoms(ring[i], ring[(i - 1) % lring]) if bond.GetBondType() == BondType.AROMATIC: bond.SetBondType(BondType.DOUBLE) mol_edit_log(mol, ring[i], ring[(i - 1) % lring]) d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True) - d2, e2 = search_and_assign_ring(mol, ring, hetero, (i - 1) % lring, forward=False) - n_targetDouble -= (d1 + d2 + 1) - n_targetEdit -= (e1 + e2 + 1) + d2, e2 = search_and_assign_ring( + mol, ring, hetero, (i - 1) % lring, forward=False + ) + n_targetDouble -= d1 + d2 + 1 + n_targetEdit -= e1 + e2 + 1 for i in hetero: atom = mol.GetAtomWithIdx(ring[i]) if (hetero_prior[i] == 2) | (n_targetDouble * 2 >= n_targetEdit): - forward_btype = mol.GetBondBetweenAtoms(ring[i], ring[(i + 1) % lring]).GetBondType() - backward_btype = mol.GetBondBetweenAtoms(ring[i], ring[(i - 1) % lring]).GetBondType() + forward_btype = mol.GetBondBetweenAtoms( + ring[i], ring[(i + 1) % lring] + ).GetBondType() + backward_btype = mol.GetBondBetweenAtoms( + ring[i], ring[(i - 1) % lring] + ).GetBondType() if forward_btype != BondType.AROMATIC: switch = forward_btype == BondType.DOUBLE - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=False, start_switch=switch) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=False, start_switch=switch + ) d2 = e2 = 0 elif backward_btype != BondType.AROMATIC: switch = backward_btype == BondType.DOUBLE - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True, start_switch=switch) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=True, start_switch=switch + ) d2 = e2 = 0 else: - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True, start_switch=True) - d2, e2 = search_and_assign_ring(mol, ring, hetero, i, forward=False, start_switch=False) - n_targetDouble -= (d1 + d2) - n_targetEdit -= (e1 + e2) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=True, start_switch=True + ) + d2, e2 = search_and_assign_ring( + mol, ring, hetero, i, forward=False, start_switch=False + ) + n_targetDouble -= d1 + d2 + n_targetEdit -= e1 + e2 else: - d1, e1 = search_and_assign_ring(mol, ring, hetero, i, forward=True, start_switch=True) - d2, e2 = search_and_assign_ring(mol, ring, hetero, i, forward=False, start_switch=True) - n_targetDouble -= (d1 + d2) - n_targetEdit -= (e1 + e2) + d1, e1 = search_and_assign_ring( + mol, ring, hetero, i, forward=True, start_switch=True + ) + d2, e2 = search_and_assign_ring( + mol, ring, hetero, i, forward=False, start_switch=True + ) + n_targetDouble -= d1 + d2 + n_targetEdit -= e1 + e2 for ring in CAr: lring = len(ring) @@ -508,18 +561,22 @@ def hetero_priority(idx, mol): Chem.SanitizeMol(mol_edited) return mol_edited except Exception as e: - raise RuntimeError(f"Manual kekulization for aromatic heterocycles failed, below are errors:\n\t {e}") + raise RuntimeError( + f"Manual kekulization for aromatic heterocycles failed, below are errors:\n\t {e}" + ) -def convert_by_obabel(mol, cache_dir=os.path.join(os.getcwd(), '.cache'), obabel_path="obabel"): +def convert_by_obabel( + mol, cache_dir=os.path.join(os.getcwd(), ".cache"), obabel_path="obabel" +): if not os.path.exists(cache_dir): os.mkdir(cache_dir) if mol.HasProp("_Name"): name = mol.GetProp("_Name") else: name = f"mol{int(time.time())}" - mol_file_in = os.path.join(cache_dir, f'{name}.mol') - mol_file_out = os.path.join(cache_dir, f'{name}_obabel.mol') + mol_file_in = os.path.join(cache_dir, f"{name}.mol") + mol_file_out = os.path.join(cache_dir, f"{name}_obabel.mol") Chem.MolToMolFile(mol, mol_file_in, kekulize=False) obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("mol", "mol") @@ -551,12 +608,14 @@ def super_sanitize_mol(mol, name=None, verbose=True): try: if verbose: print( - "Hermite procedure failed, maybe due to unsupported representation of hetero aromatic rings, re-try with obabel") + "Hermite procedure failed, maybe due to unsupported representation of hetero aromatic rings, re-try with obabel" + ) print("=====Stage 2: re-try with obabel=====") mol = convert_by_obabel(mol) mol = sanitize_mol(mol, verbose) - mol = kekulize_aromatic_heterocycles(mol, assign_formal_charge=False, - sanitize=False) # aromatic heterocycles + mol = kekulize_aromatic_heterocycles( + mol, assign_formal_charge=False, sanitize=False + ) # aromatic heterocycles mol = regularize_formal_charges(mol, sanitize=False) mol_copy = deepcopy(mol) Chem.SanitizeMol(mol_copy) @@ -571,30 +630,34 @@ def super_sanitize_mol(mol, name=None, verbose=True): class Sanitizer(object): - def __init__(self, level='medium', raise_errors=True, verbose=False): - ''' - Set up sanitizer. - -------- - Parameters: - level : 'low', 'medium' or 'high'. - `low` - use rdkit.Chem.SanitizeMol() to sanitize - `medium` - before using rdkit, assign formal charges of each atom first, which requires - the rightness of bond order information - `high` - try to regularize bond order of nitro, phosphate, sulfate, nitrine, guanidine, - pyridine-oxide function groups and aromatic heterocycles. If failed, the program - will call obabel to pre-process the mol object and re-try the procedure. - ''' + def __init__(self, level="medium", raise_errors=True, verbose=False): + """ + Set up sanitizer. + -------- + Parameters: + level : 'low', 'medium' or 'high'. + `low` - use rdkit.Chem.SanitizeMol() to sanitize + `medium` - before using rdkit, assign formal charges of each atom first, which requires + the rightness of bond order information + `high` - try to regularize bond order of nitro, phosphate, sulfate, nitrine, guanidine, + pyridine-oxide function groups and aromatic heterocycles. If failed, the program + will call obabel to pre-process the mol object and re-try the procedure. + """ self._check_level(level) self.level = level self.raise_errors = raise_errors self.verbose = verbose def _check_level(self, level): - if level not in ['low', 'medium', 'high']: - raise ValueError(f"Invalid level '{level}', please set to 'low', 'medium' or 'high'") + if level not in ["low", "medium", "high"]: + raise ValueError( + f"Invalid level '{level}', please set to 'low', 'medium' or 'high'" + ) else: - if level == 'high' and not USE_OBABEL: - raise ModuleNotFoundError("obabel not installed, high level sanitizer cannot work") + if level == "high" and not USE_OBABEL: + raise ModuleNotFoundError( + "obabel not installed, high level sanitizer cannot work" + ) def _handle_exception(self, error_info): if self.raise_errors: @@ -603,9 +666,9 @@ def _handle_exception(self, error_info): print(error_info) def sanitize(self, mol): - ''' - Sanitize mol according to `self.level`. If failed, return None. - ''' + """ + Sanitize mol according to `self.level`. If failed, return None. + """ if self.level == "low": try: Chem.SanitizeMol(mol) diff --git a/dpdata/rdkit/utils.py b/dpdata/rdkit/utils.py index 5cf0df323..e5d1c7a8b 100644 --- a/dpdata/rdkit/utils.py +++ b/dpdata/rdkit/utils.py @@ -5,6 +5,7 @@ pass import numpy as np + def mol_to_system_data(mol): if not isinstance(mol, Chem.rdchem.Mol): raise TypeError(f"rdkit.Chem.Mol required, not {type(mol)}") @@ -12,40 +13,55 @@ def mol_to_system_data(mol): num_confs = mol.GetNumConformers() if num_confs: atom_symbols = [at.GetSymbol() for at in mol.GetAtoms()] - atom_names, atom_types, atom_numbs = np.unique(atom_symbols, return_inverse=True, return_counts=True) + atom_names, atom_types, atom_numbs = np.unique( + atom_symbols, return_inverse=True, return_counts=True + ) coords = np.array([conf.GetPositions() for conf in mol.GetConformers()]) - bonds = np.array([[bond.GetBeginAtomIdx(), - bond.GetEndAtomIdx(), - bond.GetBondTypeAsDouble()] for bond in mol.GetBonds()]) - formal_charges = np.array([at.GetFormalCharge() for at in mol.GetAtoms()], dtype=np.int32) + bonds = np.array( + [ + [ + bond.GetBeginAtomIdx(), + bond.GetEndAtomIdx(), + bond.GetBondTypeAsDouble(), + ] + for bond in mol.GetBonds() + ] + ) + formal_charges = np.array( + [at.GetFormalCharge() for at in mol.GetAtoms()], dtype=np.int32 + ) data = {} - data['atom_numbs'] = list(atom_numbs) - data['atom_names'] = list(atom_names) - data['atom_types'] = atom_types - data['cells'] = np.array([[[100., 0., 0.], - [0., 100., 0.], - [0., 0., 100.]] for _ in range(num_confs)]) - data['coords'] = coords - data['bonds'] = bonds - data['formal_charges'] = formal_charges - data['orig'] = np.array([0., 0., 0.]) + data["atom_numbs"] = list(atom_numbs) + data["atom_names"] = list(atom_names) + data["atom_types"] = atom_types + data["cells"] = np.array( + [ + [[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]] + for _ in range(num_confs) + ] + ) + data["coords"] = coords + data["bonds"] = bonds + data["formal_charges"] = formal_charges + data["orig"] = np.array([0.0, 0.0, 0.0]) # other properties if mol.HasProp("_Name"): - data['_name'] = mol.GetProp('_Name') + data["_name"] = mol.GetProp("_Name") return data else: raise ValueError("The moleclue does not contain 3-D conformers") + def system_data_to_mol(data): mol_ed = Chem.RWMol() - atom_symbols = [data['atom_names'][i] for i in data['atom_types']] + atom_symbols = [data["atom_names"][i] for i in data["atom_types"]] # add atoms - for atom_type in data['atom_types']: - symbol = data['atom_names'][atom_type] + for atom_type in data["atom_types"]: + symbol = data["atom_names"][atom_type] atom = Chem.Atom(symbol) mol_ed.AddAtom(atom) # add bonds - for bond_info in data['bonds']: + for bond_info in data["bonds"]: if bond_info[2] == 1: mol_ed.AddBond(int(bond_info[0]), int(bond_info[1]), Chem.BondType.SINGLE) elif bond_info[2] == 2: @@ -55,21 +71,21 @@ def system_data_to_mol(data): elif bond_info[2] == 1.5: mol_ed.AddBond(int(bond_info[0]), int(bond_info[1]), Chem.BondType.AROMATIC) # set conformers - for frame_idx in range(data['coords'].shape[0]): - conf = Chem.rdchem.Conformer(len(data['atom_types'])) - for atom_idx in range(len(data['atom_types'])): - conf.SetAtomPosition(atom_idx, data['coords'][frame_idx][atom_idx]) + for frame_idx in range(data["coords"].shape[0]): + conf = Chem.rdchem.Conformer(len(data["atom_types"])) + for atom_idx in range(len(data["atom_types"])): + conf.SetAtomPosition(atom_idx, data["coords"][frame_idx][atom_idx]) mol_ed.AddConformer(conf, assignId=True) mol = mol_ed.GetMol() # set formal charges for idx, atom in enumerate(mol.GetAtoms()): - atom.SetFormalCharge(int(data['formal_charges'][idx])) + atom.SetFormalCharge(int(data["formal_charges"][idx])) # set mol name - if '_name' in list(data.keys()): - mol.SetProp("_Name", data['_name']) + if "_name" in list(data.keys()): + mol.SetProp("_Name", data["_name"]) # sanitize Chem.SanitizeMol(mol_ed) - return mol + return mol def check_same_atom(atom_1, atom_2): @@ -80,6 +96,7 @@ def check_same_atom(atom_1, atom_2): else: return True + def check_same_molecule(mol_1, mol_2): flag = True for bond_1, bond_2 in zip(mol_1.GetBonds(), mol_2.GetBonds()): @@ -93,6 +110,7 @@ def check_same_molecule(mol_1, mol_2): break return flag + def check_molecule_list(mols): flag = True for mol in mols[1:]: @@ -101,6 +119,7 @@ def check_molecule_list(mols): break return flag + def combine_molecules(mols): if check_molecule_list(mols): for mol in mols[1:]: diff --git a/dpdata/siesta/__init__.py b/dpdata/siesta/__init__.py index 8b1378917..e69de29bb 100644 --- a/dpdata/siesta/__init__.py +++ b/dpdata/siesta/__init__.py @@ -1 +0,0 @@ - diff --git a/dpdata/siesta/aiMD_output.py b/dpdata/siesta/aiMD_output.py index 766ced212..32cb081fb 100644 --- a/dpdata/siesta/aiMD_output.py +++ b/dpdata/siesta/aiMD_output.py @@ -7,7 +7,7 @@ #############################read output##################################### def get_single_line_tail(fin, keyword, num=1): - file = open(fin, 'r') + file = open(fin, "r") part_res = [] for value in file: if keyword in value: @@ -17,12 +17,21 @@ def get_single_line_tail(fin, keyword, num=1): file.close() return part_res + ## atomnum: number of atoms, row numbers ## begin_column: begin column num ## read_column_num: read column num ## column_num: the column number in nxet reading line -def extract_keyword(fout, keyword, down_line_num, begin_column, read_column_num, is_repeated_read, column_num): - file = open(fout, 'r') +def extract_keyword( + fout, + keyword, + down_line_num, + begin_column, + read_column_num, + is_repeated_read, + column_num, +): + file = open(fout, "r") ret = [] part_ret = [] flag = 0 @@ -61,17 +70,18 @@ def extract_keyword(fout, keyword, down_line_num, begin_column, read_column_num, file.close() return part_ret + def obtain_nframe(fname): - fp = open(fname, 'r') + fp = open(fname, "r") flag = False idx = 0 temp = 0 for ii in fp: - if 'siesta: Stress tensor (static) (eV/Ang**3):' in ii: + if "siesta: Stress tensor (static) (eV/Ang**3):" in ii: flag = True continue if flag: - if not 'siesta: Pressure (static):' in ii: + if not "siesta: Pressure (static):" in ii: if len(ii.split()) == 3: temp += 1 if temp == 3: @@ -82,34 +92,42 @@ def obtain_nframe(fname): fp.close() return idx + def get_atom_types(fout, atomnums): - covert_type = extract_keyword(fout, 'outcoor: Atomic coordinates (Ang):', atomnums, 3, 4, 0, 6)[0] + covert_type = extract_keyword( + fout, "outcoor: Atomic coordinates (Ang):", atomnums, 3, 4, 0, 6 + )[0] atomtype = [] # print(covert_type) for i in range(0, len(covert_type)): atomtype.append(int(covert_type[i]) - 1) return atomtype + def get_atom_name(fout): - file = open(fout, 'r') + file = open(fout, "r") ret = [] for value in file: - if 'Species number:' in value: + if "Species number:" in value: for j in range(len(value.split())): - if value.split()[j] == 'Label:': - ret.append(value.split()[j+1]) - break + if value.split()[j] == "Label:": + ret.append(value.split()[j + 1]) + break file.close() return ret + def get_atom_numbs(atomtypes): atom_numbs = [] for i in set(atomtypes): atom_numbs.append(atomtypes.count(i)) return atom_numbs + def get_virial(fout, cell): - viri = extract_keyword(fout, 'siesta: Stress tensor (static) (eV/Ang**3):', 3, 0, 3, 1, 3) + viri = extract_keyword( + fout, "siesta: Stress tensor (static) (eV/Ang**3):", 3, 0, 3, 1, 3 + ) vols = [] length = obtain_nframe(fout) for ii in range(length): @@ -120,6 +138,7 @@ def get_virial(fout, cell): viri[ii][jj] *= vols[ii] return viri + def covert_dimension(arr, num): arr = np.array(arr) frames = len(arr) @@ -128,23 +147,39 @@ def covert_dimension(arr, num): ret[i] = arr[i].reshape(num, 3) return ret + def get_aiMD_frame(fname): - NumberOfSpecies = int(get_single_line_tail(fname, 'redata: Number of Atomic Species')[0]) + NumberOfSpecies = int( + get_single_line_tail(fname, "redata: Number of Atomic Species")[0] + ) atom_names = get_atom_name(fname) - tot_natoms = int(get_single_line_tail(fname, 'Number of atoms', 3)[0]) + tot_natoms = int(get_single_line_tail(fname, "Number of atoms", 3)[0]) atom_types = get_atom_types(fname, tot_natoms) atom_numbs = get_atom_numbs(atom_types) - assert (max(atom_types) + 1 == NumberOfSpecies) - - cell = extract_keyword(fname, 'outcell: Unit cell vectors (Ang):', 3, 0, 3, 1, 3) - coord = extract_keyword(fname, 'outcoor: Atomic coordinates (Ang):', tot_natoms, 0, 3, 1, 6) - energy = get_single_line_tail(fname, 'siesta: E_KS(eV) =') - force = extract_keyword(fname, 'siesta: Atomic forces (eV/Ang):', tot_natoms, 1, 4, 1, 4) + assert max(atom_types) + 1 == NumberOfSpecies + + cell = extract_keyword(fname, "outcell: Unit cell vectors (Ang):", 3, 0, 3, 1, 3) + coord = extract_keyword( + fname, "outcoor: Atomic coordinates (Ang):", tot_natoms, 0, 3, 1, 6 + ) + energy = get_single_line_tail(fname, "siesta: E_KS(eV) =") + force = extract_keyword( + fname, "siesta: Atomic forces (eV/Ang):", tot_natoms, 1, 4, 1, 4 + ) virial = get_virial(fname, cell) cells = covert_dimension(np.array(cell), 3) coords = covert_dimension(np.array(coord), tot_natoms) forces = covert_dimension(np.array(force), tot_natoms) virials = covert_dimension(np.array(virial), 3) - return atom_names, atom_numbs, np.array(atom_types), cells, coords, np.array(energy), forces, virials + return ( + atom_names, + atom_numbs, + np.array(atom_types), + cells, + coords, + np.array(energy), + forces, + virials, + ) diff --git a/dpdata/siesta/output.py b/dpdata/siesta/output.py index e5969b279..b70fe55db 100644 --- a/dpdata/siesta/output.py +++ b/dpdata/siesta/output.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 import numpy as np @@ -8,7 +8,7 @@ #############################read output##################################### def get_single_line_tail(fin, keyword, num=1): - file = open(fin, 'r') + file = open(fin, "r") res = [] for value in file: if keyword in value: @@ -23,7 +23,7 @@ def get_single_line_tail(fin, keyword, num=1): ## begin_column: begin column num ## column_num: read column num def extract_keyword(fout, keyword, down_line_num, begin_column, column_num): - file = open(fout, 'r') + file = open(fout, "r") ret = [] flag = 0 idx = 0 @@ -53,24 +53,28 @@ def extract_keyword(fout, keyword, down_line_num, begin_column, column_num): def get_atom_types(fout, atomnums): - covert_type = extract_keyword(fout, 'outcoor: Atomic coordinates (Ang):', atomnums, 3, 4) + covert_type = extract_keyword( + fout, "outcoor: Atomic coordinates (Ang):", atomnums, 3, 4 + ) atomtype = [] for i in range(0, len(covert_type)): atomtype.append(int(covert_type[i]) - 1) return atomtype + def get_atom_name(fout): - file = open(fout, 'r') + file = open(fout, "r") ret = [] for value in file: - if 'Species number:' in value: + if "Species number:" in value: for j in range(len(value.split())): - if value.split()[j] == 'Label:': - ret.append(value.split()[j+1]) - break + if value.split()[j] == "Label:": + ret.append(value.split()[j + 1]) + break file.close() return ret + def get_atom_numbs(atomtypes): atom_numbs = [] for i in set(atomtypes): @@ -83,7 +87,7 @@ def get_virial(fout, cells): for ii in cells: ### calucate vol vols.append(np.linalg.det(ii.reshape([3, 3]))) - ret = extract_keyword(fout, 'siesta: Stress tensor (static) (eV/Ang**3):', 3, 1, 4) + ret = extract_keyword(fout, "siesta: Stress tensor (static) (eV/Ang**3):", 3, 1, 4) ret = np.array([ret]) for idx, ii in enumerate(ret): ## siesta: 1eV/A^3= 1.60217*10^11 Pa , ---> qe: kBar=10^8Pa @@ -93,16 +97,20 @@ def get_virial(fout, cells): def obtain_frame(fname): - NumberOfSpecies = int(get_single_line_tail(fname, 'redata: Number of Atomic Species')[0]) + NumberOfSpecies = int( + get_single_line_tail(fname, "redata: Number of Atomic Species")[0] + ) atom_names = get_atom_name(fname) - tot_natoms = int(get_single_line_tail(fname, 'Number of atoms', 3)[0]) + tot_natoms = int(get_single_line_tail(fname, "Number of atoms", 3)[0]) atom_types = get_atom_types(fname, tot_natoms) atom_numbs = get_atom_numbs(atom_types) - assert (max(atom_types) + 1 == NumberOfSpecies) - cell = extract_keyword(fname, 'outcell: Unit cell vectors (Ang):', 3, 0, 3) - coord = extract_keyword(fname, 'outcoor: Atomic coordinates (Ang):', tot_natoms, 0, 3) - energy = get_single_line_tail(fname, 'siesta: E_KS(eV) =') - force = extract_keyword(fname, 'siesta: Atomic forces (eV/Ang):', tot_natoms, 1, 4) + assert max(atom_types) + 1 == NumberOfSpecies + cell = extract_keyword(fname, "outcell: Unit cell vectors (Ang):", 3, 0, 3) + coord = extract_keyword( + fname, "outcoor: Atomic coordinates (Ang):", tot_natoms, 0, 3 + ) + energy = get_single_line_tail(fname, "siesta: E_KS(eV) =") + force = extract_keyword(fname, "siesta: Atomic forces (eV/Ang):", tot_natoms, 1, 4) virial = get_virial(fname, np.array([cell])) cell = np.array(cell).reshape(3, 3) @@ -121,6 +129,13 @@ def obtain_frame(fname): # data['forces'] = np.array([force]) # data['virials'] = virial # return data - return atom_names, atom_numbs, np.array(atom_types), np.array([cell]), np.array([coord]), \ - np.array(energy), np.array([force]), np.array([virial]) - + return ( + atom_names, + atom_numbs, + np.array(atom_types), + np.array([cell]), + np.array([coord]), + np.array(energy), + np.array([force]), + np.array([virial]), + ) diff --git a/dpdata/stat.py b/dpdata/stat.py index 46d0a4a36..1f6193af3 100644 --- a/dpdata/stat.py +++ b/dpdata/stat.py @@ -40,7 +40,7 @@ def rmse(errors: np.ndarray) -> np.float64: class ErrorsBase(metaclass=ABCMeta): """Compute errors (deviations) between two systems. The type of system is assigned by SYSTEM_TYPE. - + Parameters ---------- system_1 : object @@ -48,11 +48,16 @@ class ErrorsBase(metaclass=ABCMeta): system_2 : object system 2 """ + SYSTEM_TYPE = object def __init__(self, system_1: SYSTEM_TYPE, system_2: SYSTEM_TYPE) -> None: - assert isinstance(system_1, self.SYSTEM_TYPE), "system_1 should be %s" % self.SYSTEM_TYPE.__name__ - assert isinstance(system_2, self.SYSTEM_TYPE), "system_2 should be %s" % self.SYSTEM_TYPE.__name__ + assert isinstance(system_1, self.SYSTEM_TYPE), ( + "system_1 should be %s" % self.SYSTEM_TYPE.__name__ + ) + assert isinstance(system_2, self.SYSTEM_TYPE), ( + "system_2 should be %s" % self.SYSTEM_TYPE.__name__ + ) self.system_1 = system_1 self.system_2 = system_2 @@ -78,7 +83,7 @@ def e_rmse(self) -> np.float64: def f_mae(self) -> np.float64: """Force MAE.""" return mae(self.f_errors) - + @property def f_rmse(self) -> np.float64: """Force RMSE.""" @@ -102,19 +107,20 @@ class Errors(ErrorsBase): >>> e = dpdata.stat.Errors(system_1, system_2) >>> print("%.4f %.4f %.4f %.4f" % (e.e_mae, e.e_rmse, e.f_mae, e.f_rmse)) """ + SYSTEM_TYPE = LabeledSystem @property @lru_cache() def e_errors(self) -> np.ndarray: """Energy errors.""" - return self.system_1['energies'] - self.system_2['energies'] + return self.system_1["energies"] - self.system_2["energies"] @property @lru_cache() def f_errors(self) -> np.ndarray: """Force errors.""" - return (self.system_1['forces'] - self.system_2['forces']).ravel() + return (self.system_1["forces"] - self.system_2["forces"]).ravel() class MultiErrors(ErrorsBase): @@ -134,6 +140,7 @@ class MultiErrors(ErrorsBase): >>> e = dpdata.stat.MultiErrors(system_1, system_2) >>> print("%.4f %.4f %.4f %.4f" % (e.e_mae, e.e_rmse, e.f_mae, e.f_rmse)) """ + SYSTEM_TYPE = MultiSystems @property diff --git a/dpdata/system.py b/dpdata/system.py index f403b8791..f7ad7b5f8 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -8,7 +8,7 @@ from enum import Enum, unique from typing import Any, Tuple, Union from monty.json import MSONable -from monty.serialization import loadfn,dumpfn +from monty.serialization import loadfn, dumpfn from dpdata.periodic_table import Element from dpdata.amber.mask import pick_by_amber_mask, load_param_file import dpdata @@ -26,19 +26,21 @@ add_atom_names, ) + def load_format(fmt): fmt = fmt.lower() formats = Format.get_formats() if fmt in formats: return formats[fmt]() raise NotImplementedError( - "Unsupported data format %s. Supported formats: %s" % ( - fmt, " ".join(formats) - )) + "Unsupported data format %s. Supported formats: %s" % (fmt, " ".join(formats)) + ) + @unique class Axis(Enum): """Data axis.""" + NFRAMES = "nframes" NATOMS = "natoms" NTYPES = "ntypes" @@ -64,7 +66,14 @@ class DataType: required : bool, default=True whether this data is required """ - def __init__(self, name: str, dtype: type, shape: Tuple[int, Axis]=None, required: bool=True) -> None: + + def __init__( + self, + name: str, + dtype: type, + shape: Tuple[int, Axis] = None, + required: bool = True, + ) -> None: self.name = name self.dtype = dtype self.shape = shape @@ -91,12 +100,12 @@ def real_shape(self, system: "System") -> Tuple[int]: def check(self, system: "System"): """Check if a system has correct data of this type. - + Parameters ---------- system : System checked system - + Raises ------ DataError @@ -110,28 +119,34 @@ def check(self, system: "System"): if isinstance(data, list) and not len(data): pass elif not isinstance(data, self.dtype): - raise DataError("Type of %s is %s, but expected %s" % (self.name, - type(data).__name__, self.dtype.__name__)) + raise DataError( + "Type of %s is %s, but expected %s" + % (self.name, type(data).__name__, self.dtype.__name__) + ) # check shape if self.shape is not None: shape = self.real_shape(system) # skip checking empty list of np.ndarray if isinstance(data, np.ndarray): if data.size and shape != data.shape: - raise DataError("Shape of %s is %s, but expected %s" % (self.name, - data.shape, shape)) + raise DataError( + "Shape of %s is %s, but expected %s" + % (self.name, data.shape, shape) + ) elif isinstance(data, list): if len(shape) and shape[0] != len(data): - raise DataError("Length of %s is %d, but expected %d" % (self.name, - len(data), shape[0])) + raise DataError( + "Length of %s is %d, but expected %d" + % (self.name, len(data), shape[0]) + ) else: raise RuntimeError("Unsupported type to check shape") elif self.required: raise DataError("%s not found in data" % self.name) -class System (MSONable) : - ''' +class System(MSONable): + """ The data System A data System (a concept used by `deepmd-kit `_) @@ -153,12 +168,13 @@ class System (MSONable) : Restrictions: - `d_example['orig']` is always [0, 0, 0] - `d_example['cells'][ii]` is always lower triangular (lammps cell tensor convention) - + Attributes ---------- DTYPES : tuple[DataType] data types of this class - ''' + """ + DTYPES = ( DataType("atom_numbs", list, (Axis.NTYPES,)), DataType("atom_names", list, (Axis.NTYPES,)), @@ -169,15 +185,17 @@ class System (MSONable) : DataType("nopbc", bool, required=False), ) - def __init__ (self, - file_name = None, - fmt = 'auto', - type_map = None, - begin = 0, - step = 1, - data = None, - convergence_check = True, - **kwargs) : + def __init__( + self, + file_name=None, + fmt="auto", + type_map=None, + begin=0, + step=1, + data=None, + convergence_check=True, + **kwargs, + ): """ Constructor @@ -199,7 +217,7 @@ def __init__ (self, - ``vasp/xml``: vasp xml - ``qe/cp/traj``: Quantum Espresso CP trajectory files. should have: file_name+'.in' and file_name+'.pos' - ``qe/pw/scf``: Quantum Espresso PW single point calculations. Both input and output files are required. If file_name is a string, it denotes the output file name. Input file name is obtained by replacing 'out' by 'in' from file_name. Or file_name is a list, with the first element being the input file name and the second element being the output filename. - - ``abacus/scf``: ABACUS pw/lcao scf. The directory containing INPUT file is required. + - ``abacus/scf``: ABACUS pw/lcao scf. The directory containing INPUT file is required. - ``abacus/md``: ABACUS pw/lcao MD. The directory containing INPUT file is required. - ``abacus/relax``: ABACUS pw/lcao relax or cell-relax. The directory containing INPUT file is required. - ``abacus/stru``: abacus stru @@ -249,27 +267,35 @@ def __init__ (self, Whether to request a convergence check. """ self.data = {} - self.data['atom_numbs'] = [] - self.data['atom_names'] = [] - self.data['atom_types'] = [] - self.data['orig'] = np.array([0, 0, 0]) - self.data['cells'] = [] - self.data['coords'] = [] + self.data["atom_numbs"] = [] + self.data["atom_names"] = [] + self.data["atom_types"] = [] + self.data["orig"] = np.array([0, 0, 0]) + self.data["cells"] = [] + self.data["coords"] = [] if data: - self.data=data + self.data = data self.check_data() return - if file_name is None : + if file_name is None: return - self.from_fmt(file_name, fmt, type_map=type_map, begin= begin, step=step, convergence_check=convergence_check, **kwargs) + self.from_fmt( + file_name, + fmt, + type_map=type_map, + begin=begin, + step=step, + convergence_check=convergence_check, + **kwargs, + ) if type_map is not None: self.apply_type_map(type_map) def check_data(self): """Check if data is correct. - + Raises ------ DataError @@ -280,16 +306,19 @@ def check_data(self): for dd in self.DTYPES: dd.check(self) if sum(self.get_atom_numbs()) != self.get_natoms(): - raise DataError("Sum of atom_numbs (%d) is not equal to natoms (%d)." % (sum(self.get_atom_numbs()), self.get_natoms())) + raise DataError( + "Sum of atom_numbs (%d) is not equal to natoms (%d)." + % (sum(self.get_atom_numbs()), self.get_natoms()) + ) post_funcs = Plugin() - def from_fmt(self, file_name, fmt='auto', **kwargs): + def from_fmt(self, file_name, fmt="auto", **kwargs): fmt = fmt.lower() - if fmt == 'auto': - fmt = os.path.basename(file_name).split('.')[-1].lower() + if fmt == "auto": + fmt = os.path.basename(file_name).split(".")[-1].lower() return self.from_fmt_obj(load_format(fmt), file_name, **kwargs) - + def from_fmt_obj(self, fmtobj, file_name, **kwargs): data = fmtobj.from_system(file_name, **kwargs) if data: @@ -299,26 +328,26 @@ def from_fmt_obj(self, fmtobj, file_name, **kwargs): else: self.data = {**self.data, **data} self.check_data() - if hasattr(fmtobj.from_system, 'post_func'): + if hasattr(fmtobj.from_system, "post_func"): for post_f in fmtobj.from_system.post_func: self.post_funcs.get_plugin(post_f)(self) return self - def to(self, fmt: str, *args, **kwargs) -> 'System': + def to(self, fmt: str, *args, **kwargs) -> "System": """Dump systems to the specific format. - + Parameters ---------- fmt : str format - + Returns ------- System self """ return self.to_fmt_obj(load_format(fmt), *args, **kwargs) - + def to_fmt_obj(self, fmtobj, *args, **kwargs): return fmtobj.to_system(self.data, *args, **kwargs) @@ -326,15 +355,15 @@ def __repr__(self): return self.__str__() def __str__(self): - ret="Data Summary" - ret+="\nUnlabeled System" - ret+="\n-------------------" - ret+="\nFrame Numbers : %d"%self.get_nframes() - ret+="\nAtom Numbers : %d"%self.get_natoms() - ret+="\nElement List :" - ret+="\n-------------------" - ret+="\n"+" ".join(map(str,self.get_atom_names())) - ret+="\n"+" ".join(map(str,self.get_atom_numbs())) + ret = "Data Summary" + ret += "\nUnlabeled System" + ret += "\n-------------------" + ret += "\nFrame Numbers : %d" % self.get_nframes() + ret += "\nAtom Numbers : %d" % self.get_natoms() + ret += "\nElement List :" + ret += "\n-------------------" + ret += "\n" + " ".join(map(str, self.get_atom_names())) + ret += "\n" + " ".join(map(str, self.get_atom_numbs())) return ret def __getitem__(self, key): @@ -343,31 +372,28 @@ def __getitem__(self, key): return self.sub_system(key) return self.data[key] - def __len__(self) : + def __len__(self): """Returns number of frames in the system""" return self.get_nframes() + def __add__(self, others): + """magic method "+" operation""" + self_copy = self.copy() + if isinstance(others, System): + other_copy = others.copy() + self_copy.append(other_copy) + elif isinstance(others, list): + for ii in others: + assert isinstance(ii, System) + ii_copy = ii.copy() + self_copy.append(ii_copy) + else: + raise RuntimeError("Unspported data structure") + return self.__class__.from_dict({"data": self_copy.data}) - def __add__(self,others) : - """magic method "+" operation """ - self_copy=self.copy() - if isinstance(others,System): - other_copy=others.copy() - self_copy.append(other_copy) - elif isinstance(others, list): - for ii in others: - assert(isinstance(ii,System)) - ii_copy=ii.copy() - self_copy.append(ii_copy) - else: - raise RuntimeError("Unspported data structure") - return self.__class__.from_dict({'data':self_copy.data}) - - - def dump(self,filename,indent=4): - """dump .json or .yaml file """ - dumpfn(self.as_dict(),filename,indent=indent) - + def dump(self, filename, indent=4): + """dump .json or .yaml file""" + dumpfn(self.as_dict(), filename, indent=indent) def map_atom_types(self, type_map=None) -> np.ndarray: """ @@ -387,75 +413,70 @@ def map_atom_types(self, type_map=None) -> np.ndarray: new_atom_types : np.ndarray The mapped atom types """ - if isinstance(type_map,dict) or type_map is None: - pass - elif isinstance(type_map,list): - type_map=dict(zip(type_map,range(len(type_map)))) + if isinstance(type_map, dict) or type_map is None: + pass + elif isinstance(type_map, list): + type_map = dict(zip(type_map, range(len(type_map)))) else: - raise RuntimeError("Unknown format") + raise RuntimeError("Unknown format") if type_map is None: - type_map=elements_index_map(self.get_atom_names().copy(),standard=True) + type_map = elements_index_map(self.get_atom_names().copy(), standard=True) - _set1=set(self.get_atom_names()) - _set2=set(list(type_map.keys())) + _set1 = set(self.get_atom_names()) + _set2 = set(list(type_map.keys())) assert _set1.issubset(_set2) - atom_types_list=[] - for name, numb in zip(self.get_atom_names(), self.get_atom_numbs()): - atom_types_list.extend([name]*numb) + atom_types_list = [] + for name, numb in zip(self.get_atom_names(), self.get_atom_numbs()): + atom_types_list.extend([name] * numb) new_atom_types = np.array([type_map[ii] for ii in atom_types_list], dtype=int) return new_atom_types @staticmethod def load(filename): - """rebuild System obj. from .json or .yaml file """ + """rebuild System obj. from .json or .yaml file""" return loadfn(filename) def as_dict(self): """Returns data dict of System instance""" - d={"@module": self.__class__.__module__, - "@class": self.__class__.__name__, - "data": self.data - } + d = { + "@module": self.__class__.__module__, + "@class": self.__class__.__name__, + "data": self.data, + } return d - def get_atom_names(self): - """Returns name of atoms """ - return self.data['atom_names'] - + """Returns name of atoms""" + return self.data["atom_names"] def get_atom_types(self): - """Returns type of atoms """ - return self.data['atom_types'] - + """Returns type of atoms""" + return self.data["atom_types"] def get_atom_numbs(self): - """Returns number of atoms """ - return self.data['atom_numbs'] - + """Returns number of atoms""" + return self.data["atom_numbs"] - def get_nframes(self) : + def get_nframes(self): """Returns number of frames in the system""" - return len(self.data['cells']) + return len(self.data["cells"]) - - def get_natoms(self) : + def get_natoms(self): """Returns total number of atoms in the system""" - return len(self.data['atom_types']) + return len(self.data["atom_types"]) def get_ntypes(self) -> int: """Returns total number of atom types in the system.""" - return len(self.data['atom_names']) + return len(self.data["atom_names"]) def copy(self): - """Returns a copy of the system. """ - return self.__class__.from_dict({'data':deepcopy(self.data)}) - + """Returns a copy of the system.""" + return self.__class__.from_dict({"data": deepcopy(self.data)}) - def sub_system(self, f_idx) : + def sub_system(self, f_idx): """ Construct a subsystem from the system @@ -487,8 +508,7 @@ def sub_system(self, f_idx) : tmp.data[tt.name] = self.data[tt.name] return tmp - - def append(self, system) : + def append(self, system): """ Append a system to this system @@ -497,44 +517,49 @@ def append(self, system) : system : System The system to append """ - if not len(system.data['atom_numbs']): + if not len(system.data["atom_numbs"]): # skip if the system to append is non-converged return False - elif not len(self.data['atom_numbs']): + elif not len(self.data["atom_numbs"]): # this system is non-converged but the system to append is converged self.data = system.data return False if system.uniq_formula != self.uniq_formula: - raise RuntimeError('systems with inconsistent formula could not be append: %s v.s. %s' % (self.uniq_formula, system.uniq_formula)) - if system.data['atom_names'] != self.data['atom_names']: + raise RuntimeError( + "systems with inconsistent formula could not be append: %s v.s. %s" + % (self.uniq_formula, system.uniq_formula) + ) + if system.data["atom_names"] != self.data["atom_names"]: # allow to append a system with different atom_names order system.sort_atom_names() self.sort_atom_names() - if (system.data['atom_types'] != self.data['atom_types']).any(): + if (system.data["atom_types"] != self.data["atom_types"]).any(): # allow to append a system with different atom_types order system.sort_atom_types() self.sort_atom_types() - for ii in ['atom_numbs', 'atom_names'] : - assert(system.data[ii] == self.data[ii]) - for ii in ['atom_types','orig'] : - eq = [v1==v2 for v1,v2 in zip(system.data[ii], self.data[ii])] - assert(all(eq)) + for ii in ["atom_numbs", "atom_names"]: + assert system.data[ii] == self.data[ii] + for ii in ["atom_types", "orig"]: + eq = [v1 == v2 for v1, v2 in zip(system.data[ii], self.data[ii])] + assert all(eq) for tt in self.DTYPES: # check if the first shape is nframes if tt.shape is not None and Axis.NFRAMES in tt.shape: if tt.name not in self.data and tt.name in system.data: - raise RuntimeError('system has %s, but this does not' % tt.name) + raise RuntimeError("system has %s, but this does not" % tt.name) elif tt.name in self.data and tt.name not in system.data: - raise RuntimeError('this has %s, but system does not' % tt.name) + raise RuntimeError("this has %s, but system does not" % tt.name) elif tt.name not in self.data and tt.name not in system.data: # skip if both not exist continue # concat any data in nframes axis axis_nframes = tt.shape.index(Axis.NFRAMES) - self.data[tt.name] = np.concatenate((self.data[tt.name], system[tt.name]), axis=axis_nframes) + self.data[tt.name] = np.concatenate( + (self.data[tt.name], system[tt.name]), axis=axis_nframes + ) if self.nopbc and not system.nopbc: # appended system uses PBC, cancel nopbc - self.data['nopbc'] = False + self.data["nopbc"] = False return True def sort_atom_names(self, type_map=None): @@ -560,24 +585,24 @@ def check_type_map(self, type_map): type_map : list type_map """ - if type_map is not None and type_map != self.data['atom_names']: + if type_map is not None and type_map != self.data["atom_names"]: self.sort_atom_names(type_map=type_map) - def apply_type_map(self, type_map) : + def apply_type_map(self, type_map): if type_map is not None and type(type_map) is list: self.check_type_map(type_map) else: - raise RuntimeError('invalid type map, cannot be applied') + raise RuntimeError("invalid type map, cannot be applied") def sort_atom_types(self) -> np.ndarray: """Sort atom types. - + Returns ------- idx : np.ndarray new atom index in the Axis.NATOMS """ - idx = np.argsort(self.data['atom_types']) + idx = np.argsort(self.data["atom_types"]) for tt in self.DTYPES: if tt.name not in self.data: # skip optional data @@ -594,8 +619,14 @@ def formula(self): """ Return the formula of this system, like C3H5O2 """ - return ''.join(["{}{}".format(symbol,numb) for symbol,numb in - zip(self.data['atom_names'], self.data['atom_numbs'])]) + return "".join( + [ + "{}{}".format(symbol, numb) + for symbol, numb in zip( + self.data["atom_names"], self.data["atom_numbs"] + ) + ] + ) @property def uniq_formula(self): @@ -604,9 +635,14 @@ def uniq_formula(self): The uniq_formula sort the elements in formula by names. Systems with the same uniq_formula can be append together. """ - return ''.join(["{}{}".format(symbol,numb) for symbol,numb in sorted( - zip(self.data['atom_names'], self.data['atom_numbs']))]) - + return "".join( + [ + "{}{}".format(symbol, numb) + for symbol, numb in sorted( + zip(self.data["atom_names"], self.data["atom_numbs"]) + ) + ] + ) def extend(self, systems): """ @@ -621,18 +657,16 @@ def extend(self, systems): for system in systems: self.append(system.copy()) - - def apply_pbc(self) : + def apply_pbc(self): """ Append periodic boundary condition """ - ncoord = dpdata.md.pbc.dir_coord(self.data['coords'], self.data['cells']) + ncoord = dpdata.md.pbc.dir_coord(self.data["coords"], self.data["cells"]) ncoord = ncoord % 1 - self.data['coords'] = np.matmul(ncoord, self.data['cells']) - + self.data["coords"] = np.matmul(ncoord, self.data["cells"]) @post_funcs.register("remove_pbc") - def remove_pbc(self, protect_layer = 9): + def remove_pbc(self, protect_layer=9): """ This method does NOT delete the definition of the cells, it (1) revises the cell to a cubic cell and ensures that the cell @@ -645,47 +679,44 @@ def remove_pbc(self, protect_layer = 9): protect_layer : the protect layer between the atoms and the cell boundary """ - assert(protect_layer >= 0), "the protect_layer should be no less than 0" + assert protect_layer >= 0, "the protect_layer should be no less than 0" remove_pbc(self.data, protect_layer) - def affine_map(self, trans, f_idx = 0) : - assert(np.linalg.det(trans) != 0) - self.data['cells'][f_idx] = np.matmul(self.data['cells'][f_idx], trans) - self.data['coords'][f_idx] = np.matmul(self.data['coords'][f_idx], trans) - + def affine_map(self, trans, f_idx=0): + assert np.linalg.det(trans) != 0 + self.data["cells"][f_idx] = np.matmul(self.data["cells"][f_idx], trans) + self.data["coords"][f_idx] = np.matmul(self.data["coords"][f_idx], trans) @post_funcs.register("shift_orig_zero") - def _shift_orig_zero(self) : - for ff in self.data['coords'] : - for ii in ff : - ii = ii - self.data['orig'] - self.data['orig'] = self.data['orig'] - self.data['orig'] - assert((np.zeros([3]) == self.data['orig']).all()) + def _shift_orig_zero(self): + for ff in self.data["coords"]: + for ii in ff: + ii = ii - self.data["orig"] + self.data["orig"] = self.data["orig"] - self.data["orig"] + assert (np.zeros([3]) == self.data["orig"]).all() @post_funcs.register("rot_lower_triangular") - def rot_lower_triangular(self) : - for ii in range(self.get_nframes()) : + def rot_lower_triangular(self): + for ii in range(self.get_nframes()): self.rot_frame_lower_triangular(ii) - - def rot_frame_lower_triangular(self, f_idx = 0) : - qq, rr = np.linalg.qr(self.data['cells'][f_idx].T) - if np.linalg.det(qq) < 0 : + def rot_frame_lower_triangular(self, f_idx=0): + qq, rr = np.linalg.qr(self.data["cells"][f_idx].T) + if np.linalg.det(qq) < 0: qq = -qq rr = -rr - self.affine_map(qq, f_idx = f_idx) + self.affine_map(qq, f_idx=f_idx) rot = np.eye(3) - if self.data['cells'][f_idx][0][0] < 0 : + if self.data["cells"][f_idx][0][0] < 0: rot[0][0] = -1 - if self.data['cells'][f_idx][1][1] < 0 : + if self.data["cells"][f_idx][1][1] < 0: rot[1][1] = -1 - if self.data['cells'][f_idx][2][2] < 0 : + if self.data["cells"][f_idx][2][2] < 0: rot[2][2] = -1 - assert(np.linalg.det(rot) == 1) - self.affine_map(rot, f_idx = f_idx) + assert np.linalg.det(rot) == 1 + self.affine_map(rot, f_idx=f_idx) return np.matmul(qq, rot) - def add_atom_names(self, atom_names): """ Add atom_names that do not exist. @@ -711,71 +742,99 @@ def replicate(self, ncopy): tmp : System The system after replication. """ - if len(ncopy) !=3: - raise RuntimeError('ncopy must be a list or tuple with 3 int') + if len(ncopy) != 3: + raise RuntimeError("ncopy must be a list or tuple with 3 int") for ii in ncopy: if type(ii) is not int: - raise RuntimeError('ncopy must be a list or tuple must with 3 int') + raise RuntimeError("ncopy must be a list or tuple must with 3 int") tmp = System() nframes = self.get_nframes() data = self.data - tmp.data['atom_names'] = list(np.copy(data['atom_names'])) - tmp.data['atom_numbs'] = list(np.array(np.copy(data['atom_numbs'])) * np.prod(ncopy)) - tmp.data['atom_types'] = np.sort(np.tile(np.copy(data['atom_types']),np.prod(ncopy))) - tmp.data['cells'] = np.copy(data['cells']) + tmp.data["atom_names"] = list(np.copy(data["atom_names"])) + tmp.data["atom_numbs"] = list( + np.array(np.copy(data["atom_numbs"])) * np.prod(ncopy) + ) + tmp.data["atom_types"] = np.sort( + np.tile(np.copy(data["atom_types"]), np.prod(ncopy)) + ) + tmp.data["cells"] = np.copy(data["cells"]) for ii in range(3): - tmp.data['cells'][:,ii,:] *= ncopy[ii] - tmp.data['coords'] = np.tile(np.copy(data['coords']),tuple(ncopy)+(1,1,1)) + tmp.data["cells"][:, ii, :] *= ncopy[ii] + tmp.data["coords"] = np.tile(np.copy(data["coords"]), tuple(ncopy) + (1, 1, 1)) for xx in range(ncopy[0]): for yy in range(ncopy[1]): for zz in range(ncopy[2]): - tmp.data['coords'][xx,yy,zz,:,:,:] += xx * np.reshape(data['cells'][:,0,:], [-1,1,3])\ - + yy * np.reshape(data['cells'][:,1,:], [-1,1,3])\ - + zz * np.reshape(data['cells'][:,2,:], [-1,1,3]) - tmp.data['coords'] = np.reshape(np.transpose(tmp.data['coords'], [3,4,0,1,2,5]), (nframes, -1 , 3)) + tmp.data["coords"][xx, yy, zz, :, :, :] += ( + xx * np.reshape(data["cells"][:, 0, :], [-1, 1, 3]) + + yy * np.reshape(data["cells"][:, 1, :], [-1, 1, 3]) + + zz * np.reshape(data["cells"][:, 2, :], [-1, 1, 3]) + ) + tmp.data["coords"] = np.reshape( + np.transpose(tmp.data["coords"], [3, 4, 0, 1, 2, 5]), (nframes, -1, 3) + ) return tmp def replace(self, initial_atom_type, end_atom_type, replace_num): if type(self) is not dpdata.System: - raise RuntimeError('Must use method replace() of the instance of class dpdata.System') + raise RuntimeError( + "Must use method replace() of the instance of class dpdata.System" + ) if type(replace_num) is not int: - raise ValueError("replace_num must be a integer. Now is {replace_num}".format(replace_num=replace_num)) + raise ValueError( + "replace_num must be a integer. Now is {replace_num}".format( + replace_num=replace_num + ) + ) if replace_num <= 0: - raise ValueError("replace_num must be larger than 0.Now is {replace_num}".format(replace_num=replace_num)) + raise ValueError( + "replace_num must be larger than 0.Now is {replace_num}".format( + replace_num=replace_num + ) + ) try: - initial_atom_index = self.data['atom_names'].index(initial_atom_type) + initial_atom_index = self.data["atom_names"].index(initial_atom_type) except ValueError as e: - raise ValueError("atom_type {initial_atom_type} not in {atom_names}" - .format(initial_atom_type=initial_atom_type, atom_names=self.data['atom_names'])) - max_replace_num = self.data['atom_numbs'][initial_atom_index] + raise ValueError( + "atom_type {initial_atom_type} not in {atom_names}".format( + initial_atom_type=initial_atom_type, + atom_names=self.data["atom_names"], + ) + ) + max_replace_num = self.data["atom_numbs"][initial_atom_index] if replace_num > max_replace_num: - raise RuntimeError("not enough {initial_atom_type} atom, only {max_replace_num} available, less than {replace_num}.Please check." - .format(initial_atom_type=initial_atom_type,max_replace_num=max_replace_num, replace_num=replace_num)) + raise RuntimeError( + "not enough {initial_atom_type} atom, only {max_replace_num} available, less than {replace_num}.Please check.".format( + initial_atom_type=initial_atom_type, + max_replace_num=max_replace_num, + replace_num=replace_num, + ) + ) - may_replace_indices = [i for i, x in enumerate(self.data['atom_types']) if x == initial_atom_index] - to_replace_indices = np.random.choice(may_replace_indices, size=replace_num, replace=False) + may_replace_indices = [ + i for i, x in enumerate(self.data["atom_types"]) if x == initial_atom_index + ] + to_replace_indices = np.random.choice( + may_replace_indices, size=replace_num, replace=False + ) - if end_atom_type not in self.data['atom_names']: - self.data['atom_names'].append(end_atom_type) - self.data['atom_numbs'].append(0) + if end_atom_type not in self.data["atom_names"]: + self.data["atom_names"].append(end_atom_type) + self.data["atom_numbs"].append(0) - end_atom_index = self.data['atom_names'].index(end_atom_type) + end_atom_index = self.data["atom_names"].index(end_atom_type) for ii in to_replace_indices: - self.data['atom_types'][ii] = end_atom_index - self.data['atom_numbs'][initial_atom_index] -= replace_num - self.data['atom_numbs'][end_atom_index] += replace_num + self.data["atom_types"][ii] = end_atom_index + self.data["atom_numbs"][initial_atom_index] -= replace_num + self.data["atom_numbs"][end_atom_index] += replace_num self.sort_atom_types() - - def perturb(self, - pert_num, - cell_pert_fraction, - atom_pert_distance, - atom_pert_style='normal'): + def perturb( + self, pert_num, cell_pert_fraction, atom_pert_distance, atom_pert_style="normal" + ): """ Perturb each frame in the system randomly. The cell will be deformed randomly, and atoms will be displaced by a random distance in random direction. @@ -812,8 +871,8 @@ def perturb(self, """ if type(self) is not dpdata.System: raise RuntimeError( - f'Using method perturb() of an instance of {type(self)}. ' - f'Must use method perturb() of the instance of class dpdata.System.' + f"Using method perturb() of an instance of {type(self)}. " + f"Must use method perturb() of the instance of class dpdata.System." ) perturbed_system = System() nframes = self.get_nframes() @@ -821,11 +880,17 @@ def perturb(self, for jj in range(pert_num): tmp_system = self[ii].copy() cell_perturb_matrix = get_cell_perturb_matrix(cell_pert_fraction) - tmp_system.data['cells'][0] = np.matmul(tmp_system.data['cells'][0],cell_perturb_matrix) - tmp_system.data['coords'][0] = np.matmul(tmp_system.data['coords'][0],cell_perturb_matrix) - for kk in range(len(tmp_system.data['coords'][0])): - atom_perturb_vector = get_atom_perturb_vector(atom_pert_distance, atom_pert_style) - tmp_system.data['coords'][0][kk] += atom_perturb_vector + tmp_system.data["cells"][0] = np.matmul( + tmp_system.data["cells"][0], cell_perturb_matrix + ) + tmp_system.data["coords"][0] = np.matmul( + tmp_system.data["coords"][0], cell_perturb_matrix + ) + for kk in range(len(tmp_system.data["coords"][0])): + atom_perturb_vector = get_atom_perturb_vector( + atom_pert_distance, atom_pert_style + ) + tmp_system.data["coords"][0][kk] += atom_perturb_vector tmp_system.rot_lower_triangular() perturbed_system.append(tmp_system) return perturbed_system @@ -838,7 +903,7 @@ def nopbc(self): @nopbc.setter def nopbc(self, value): - self.data['nopbc'] = value + self.data["nopbc"] = value def shuffle(self): """Shuffle frames randomly.""" @@ -846,7 +911,7 @@ def shuffle(self): self.data = self.sub_system(idx).data return idx - def predict(self, *args: Any, driver: str="dp", **kwargs: Any) -> "LabeledSystem": + def predict(self, *args: Any, driver: str = "dp", **kwargs: Any) -> "LabeledSystem": """ Predict energies and forces by a driver. @@ -874,9 +939,11 @@ def predict(self, *args: Any, driver: str="dp", **kwargs: Any) -> "LabeledSystem data = driver.label(self.data.copy()) return LabeledSystem(data=data) - def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) -> "LabeledSystem": + def minimize( + self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any + ) -> "LabeledSystem": """Minimize the geometry. - + Parameters ---------- *args : iterable @@ -898,7 +965,7 @@ def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) def pick_atom_idx(self, idx, nopbc=None): """Pick atom index - + Parameters ---------- idx: int or list or slice @@ -924,8 +991,10 @@ def pick_atom_idx(self, idx, nopbc=None): new_shape[axis_natoms] = idx new_sys.data[tt.name] = self.data[tt.name][tuple(new_shape)] # recalculate atom_numbs according to atom_types - atom_numbs = np.bincount(new_sys.data['atom_types'], minlength=len(self.get_atom_names())) - new_sys.data['atom_numbs'] = list(atom_numbs) + atom_numbs = np.bincount( + new_sys.data["atom_types"], minlength=len(self.get_atom_names()) + ) + new_sys.data["atom_numbs"] = list(atom_numbs) if nopbc is True or nopbc is False: new_sys.nopbc = nopbc return new_sys @@ -933,30 +1002,32 @@ def pick_atom_idx(self, idx, nopbc=None): def remove_atom_names(self, atom_names): """Remove atom names and all such atoms. For example, you may not remove EP atoms in TIP4P/Ew water, which - is not a real atom. + is not a real atom. """ if isinstance(atom_names, str): atom_names = [atom_names] removed_atom_idx = [] for an in atom_names: # get atom name idx - idx = self.data['atom_names'].index(an) - atom_idx = self.data['atom_types'] == idx + idx = self.data["atom_names"].index(an) + atom_idx = self.data["atom_types"] == idx removed_atom_idx.append(atom_idx) picked_atom_idx = ~np.any(removed_atom_idx, axis=0) new_sys = self.pick_atom_idx(picked_atom_idx) # let's remove atom_names # firstly, rearrange atom_names and put these atom_names in the end - new_atom_names = list([xx for xx in new_sys.data['atom_names'] if xx not in atom_names]) + new_atom_names = list( + [xx for xx in new_sys.data["atom_names"] if xx not in atom_names] + ) new_sys.sort_atom_names(type_map=new_atom_names + atom_names) # remove atom_names and atom_numbs - new_sys.data['atom_names'] = new_atom_names - new_sys.data['atom_numbs'] = new_sys.data['atom_numbs'][:len(new_atom_names)] + new_sys.data["atom_names"] = new_atom_names + new_sys.data["atom_numbs"] = new_sys.data["atom_numbs"][: len(new_atom_names)] return new_sys def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): """Pick atoms by amber mask - + Parameters ---------- param: str or parmed.Structure @@ -964,7 +1035,7 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): maskstr: str Amber masks pass_coords: Boolen (default: False) - If pass_coords is true, the function will pass coordinates and + If pass_coords is true, the function will pass coordinates and return a MultiSystem. Otherwise, the result is coordinate-independent, and the function will return System or LabeledSystem. @@ -976,53 +1047,60 @@ def pick_by_amber_mask(self, param, maskstr, pass_coords=False, nopbc=None): ms = MultiSystems() for sub_s in self: # TODO: this can computed in pararrel - idx = pick_by_amber_mask(parm, maskstr, sub_s['coords'][0]) + idx = pick_by_amber_mask(parm, maskstr, sub_s["coords"][0]) ms.append(sub_s.pick_atom_idx(idx, nopbc=nopbc)) return ms else: idx = pick_by_amber_mask(parm, maskstr) return self.pick_atom_idx(idx, nopbc=nopbc) + def get_cell_perturb_matrix(cell_pert_fraction): - if cell_pert_fraction<0: - raise RuntimeError('cell_pert_fraction can not be negative') + if cell_pert_fraction < 0: + raise RuntimeError("cell_pert_fraction can not be negative") e0 = np.random.rand(6) - e = e0 * 2 *cell_pert_fraction - cell_pert_fraction + e = e0 * 2 * cell_pert_fraction - cell_pert_fraction cell_pert_matrix = np.array( - [[1+e[0], 0.5 * e[5], 0.5 * e[4]], - [0.5 * e[5], 1+e[1], 0.5 * e[3]], - [0.5 * e[4], 0.5 * e[3], 1+e[2]]] + [ + [1 + e[0], 0.5 * e[5], 0.5 * e[4]], + [0.5 * e[5], 1 + e[1], 0.5 * e[3]], + [0.5 * e[4], 0.5 * e[3], 1 + e[2]], + ] ) return cell_pert_matrix -def get_atom_perturb_vector(atom_pert_distance, atom_pert_style='normal'): + +def get_atom_perturb_vector(atom_pert_distance, atom_pert_style="normal"): random_vector = None if atom_pert_distance < 0: - raise RuntimeError('atom_pert_distance can not be negative') + raise RuntimeError("atom_pert_distance can not be negative") - if atom_pert_style == 'normal': + if atom_pert_style == "normal": e = np.random.randn(3) - random_vector=(atom_pert_distance/np.sqrt(3))*e - elif atom_pert_style == 'uniform': + random_vector = (atom_pert_distance / np.sqrt(3)) * e + elif atom_pert_style == "uniform": e = np.random.randn(3) while np.linalg.norm(e) < 0.1: e = np.random.randn(3) - random_unit_vector = e/np.linalg.norm(e) + random_unit_vector = e / np.linalg.norm(e) v0 = np.random.rand(1) - v = np.power(v0,1/3) - random_vector = atom_pert_distance*v*random_unit_vector - elif atom_pert_style == 'const' : + v = np.power(v0, 1 / 3) + random_vector = atom_pert_distance * v * random_unit_vector + elif atom_pert_style == "const": e = np.random.randn(3) while np.linalg.norm(e) < 0.1: e = np.random.randn(3) - random_unit_vector = e/np.linalg.norm(e) - random_vector = atom_pert_distance*random_unit_vector + random_unit_vector = e / np.linalg.norm(e) + random_vector = atom_pert_distance * random_unit_vector else: - raise RuntimeError('unsupported options atom_pert_style={}'.format(atom_pert_style)) + raise RuntimeError( + "unsupported options atom_pert_style={}".format(atom_pert_style) + ) return random_vector -class LabeledSystem (System): - ''' + +class LabeledSystem(System): + """ The labeled data System For example, a labeled water system named `d_example` has two molecules (6 atoms) and `nframes` frames. The labels can be accessed by @@ -1063,7 +1141,7 @@ class LabeledSystem (System): The beginning frame when loading MD trajectory. step : int The number of skipped frames when loading MD trajectory. - ''' + """ DTYPES = System.DTYPES + ( DataType("energies", np.ndarray, (Axis.NFRAMES,)), @@ -1083,7 +1161,7 @@ def from_fmt_obj(self, fmtobj, file_name, **kwargs): else: self.data = {**self.data, **data} self.check_data() - if hasattr(fmtobj.from_labeled_system, 'post_func'): + if hasattr(fmtobj.from_labeled_system, "post_func"): for post_f in fmtobj.from_labeled_system.post_func: self.post_funcs.get_plugin(post_f)(self) return self @@ -1092,47 +1170,49 @@ def to_fmt_obj(self, fmtobj, *args, **kwargs): return fmtobj.to_labeled_system(self.data, *args, **kwargs) def __str__(self): - ret="Data Summary" - ret+="\nLabeled System" - ret+="\n-------------------" - ret+="\nFrame Numbers : %d"%self.get_nframes() - ret+="\nAtom Numbers : %d"%self.get_natoms() - status= "Yes" if self.has_virial() else "No" - ret+="\nIncluding Virials : %s"% status - ret+="\nElement List :" - ret+="\n-------------------" - ret+="\n"+" ".join(map(str,self.get_atom_names())) - ret+="\n"+" ".join(map(str,self.get_atom_numbs())) + ret = "Data Summary" + ret += "\nLabeled System" + ret += "\n-------------------" + ret += "\nFrame Numbers : %d" % self.get_nframes() + ret += "\nAtom Numbers : %d" % self.get_natoms() + status = "Yes" if self.has_virial() else "No" + ret += "\nIncluding Virials : %s" % status + ret += "\nElement List :" + ret += "\n-------------------" + ret += "\n" + " ".join(map(str, self.get_atom_names())) + ret += "\n" + " ".join(map(str, self.get_atom_numbs())) return ret - def __add__(self,others) : - """magic method "+" operation """ - self_copy=self.copy() - if isinstance(others,LabeledSystem): - other_copy=others.copy() - self_copy.append(other_copy) - elif isinstance(others, list): - for ii in others: - assert(isinstance(ii,LabeledSystem)) - ii_copy=ii.copy() - self_copy.append(ii_copy) - else: - raise RuntimeError("Unspported data structure") - return self.__class__.from_dict({'data':self_copy.data}) - - def has_virial(self) : + def __add__(self, others): + """magic method "+" operation""" + self_copy = self.copy() + if isinstance(others, LabeledSystem): + other_copy = others.copy() + self_copy.append(other_copy) + elif isinstance(others, list): + for ii in others: + assert isinstance(ii, LabeledSystem) + ii_copy = ii.copy() + self_copy.append(ii_copy) + else: + raise RuntimeError("Unspported data structure") + return self.__class__.from_dict({"data": self_copy.data}) + + def has_virial(self): # return ('virials' in self.data) and (len(self.data['virials']) > 0) - return ('virials' in self.data) + return "virials" in self.data - def affine_map_fv(self, trans, f_idx) : - assert(np.linalg.det(trans) != 0) - self.data['forces'][f_idx] = np.matmul(self.data['forces'][f_idx], trans) + def affine_map_fv(self, trans, f_idx): + assert np.linalg.det(trans) != 0 + self.data["forces"][f_idx] = np.matmul(self.data["forces"][f_idx], trans) if self.has_virial(): - self.data['virials'][f_idx] = np.matmul(trans.T, np.matmul(self.data['virials'][f_idx], trans)) + self.data["virials"][f_idx] = np.matmul( + trans.T, np.matmul(self.data["virials"][f_idx], trans) + ) - def rot_frame_lower_triangular(self, f_idx = 0) : - trans = System.rot_frame_lower_triangular(self, f_idx = f_idx) - self.affine_map_fv(trans, f_idx = f_idx) + def rot_frame_lower_triangular(self, f_idx=0): + trans = System.rot_frame_lower_triangular(self, f_idx=f_idx) + self.affine_map_fv(trans, f_idx=f_idx) return trans def correction(self, hl_sys): @@ -1156,17 +1236,19 @@ def correction(self, hl_sys): if not isinstance(hl_sys, LabeledSystem): raise RuntimeError("high_sys should be LabeledSystem") corrected_sys = self.copy() - corrected_sys.data['energies'] = hl_sys.data['energies'] - self.data['energies'] - corrected_sys.data['forces'] = hl_sys.data['forces'] - self.data['forces'] - if 'virials' in self.data and 'virials' in hl_sys.data: - corrected_sys.data['virials'] = hl_sys.data['virials'] - self.data['virials'] + corrected_sys.data["energies"] = hl_sys.data["energies"] - self.data["energies"] + corrected_sys.data["forces"] = hl_sys.data["forces"] - self.data["forces"] + if "virials" in self.data and "virials" in hl_sys.data: + corrected_sys.data["virials"] = ( + hl_sys.data["virials"] - self.data["virials"] + ) return corrected_sys class MultiSystems: - '''A set containing several systems.''' + """A set containing several systems.""" - def __init__(self, *systems,type_map=None): + def __init__(self, *systems, type_map=None): """ Parameters ---------- @@ -1193,18 +1275,21 @@ def from_fmt_obj(self, fmtobj, directory, labeled=True, **kwargs): return self def to_fmt_obj(self, fmtobj, directory, *args, **kwargs): - for fn, ss in zip(fmtobj.to_multi_systems(self.systems.keys(), directory, **kwargs), self.systems.values()): + for fn, ss in zip( + fmtobj.to_multi_systems(self.systems.keys(), directory, **kwargs), + self.systems.values(), + ): ss.to_fmt_obj(fmtobj, fn, *args, **kwargs) return self - + def to(self, fmt: str, *args, **kwargs) -> "MultiSystems": """Dump systems to the specific format. - + Parameters ---------- fmt : str format - + Returns ------- MultiSystems @@ -1225,41 +1310,46 @@ def __repr__(self): return self.__str__() def __str__(self): - return 'MultiSystems ({} systems containing {} frames)'.format(len(self.systems), self.get_nframes()) - - def __add__(self, others) : - """magic method "+" operation """ - self_copy = deepcopy(self) - if isinstance(others, System) or isinstance(others, MultiSystems): - return self.__class__(self, others) - elif isinstance(others, list): - return self.__class__(self, *others) - raise RuntimeError("Unspported data structure") + return "MultiSystems ({} systems containing {} frames)".format( + len(self.systems), self.get_nframes() + ) + + def __add__(self, others): + """magic method "+" operation""" + self_copy = deepcopy(self) + if isinstance(others, System) or isinstance(others, MultiSystems): + return self.__class__(self, others) + elif isinstance(others, list): + return self.__class__(self, *others) + raise RuntimeError("Unspported data structure") @classmethod - def from_file(cls,file_name,fmt, **kwargs): + def from_file(cls, file_name, fmt, **kwargs): multi_systems = cls() - multi_systems.load_systems_from_file(file_name=file_name,fmt=fmt, **kwargs) + multi_systems.load_systems_from_file(file_name=file_name, fmt=fmt, **kwargs) return multi_systems @classmethod - def from_dir(cls,dir_name, file_name, fmt='auto', type_map=None): + def from_dir(cls, dir_name, file_name, fmt="auto", type_map=None): multi_systems = cls() - target_file_list = sorted(glob.glob('./{}/**/{}'.format(dir_name, file_name), recursive=True)) + target_file_list = sorted( + glob.glob("./{}/**/{}".format(dir_name, file_name), recursive=True) + ) for target_file in target_file_list: - multi_systems.append(LabeledSystem(file_name=target_file, fmt=fmt, type_map=type_map)) + multi_systems.append( + LabeledSystem(file_name=target_file, fmt=fmt, type_map=type_map) + ) return multi_systems - def load_systems_from_file(self, file_name=None, fmt=None, **kwargs): fmt = fmt.lower() return self.from_fmt_obj(load_format(fmt), file_name, **kwargs) - def get_nframes(self) : + def get_nframes(self): """Returns number of frames in all systems""" return sum(len(system) for system in self.systems.values()) - def append(self, *systems) : + def append(self, *systems): """ Append systems or MultiSystems to systems @@ -1335,7 +1425,9 @@ def predict(self, *args: Any, driver="dp", **kwargs: Any) -> "MultiSystems": new_multisystems.append(ss.predict(*args, driver=driver, **kwargs)) return new_multisystems - def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) -> "MultiSystems": + def minimize( + self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any + ) -> "MultiSystems": """ Minimize geometry by a minimizer. @@ -1367,10 +1459,10 @@ def minimize(self, *args: Any, minimizer: Union[str, Minimizer], **kwargs: Any) for ss in self: new_multisystems.append(ss.minimize(*args, minimizer=minimizer, **kwargs)) return new_multisystems - + def pick_atom_idx(self, idx, nopbc=None): """Pick atom index - + Parameters ---------- idx: int or list or slice @@ -1429,12 +1521,12 @@ def correction(self, hl_sys: "MultiSystems"): def get_cls_name(cls: object) -> str: """Returns the fully qualified name of a class, such as `np.ndarray`. - + Parameters ---------- cls : object the class - + Returns ------- str @@ -1442,6 +1534,7 @@ def get_cls_name(cls: object) -> str: """ return ".".join([cls.__module__, cls.__name__]) + def add_format_methods(): """Add format methods to System, LabeledSystem, and MultiSystems. @@ -1459,11 +1552,15 @@ def add_format_methods(): Format.register_to(to_func_name)(formatcls) for method, formatcls in Format.get_from_methods().items(): + def get_func(ff): # ff is not initized when defining from_format so cannot be polluted def from_format(self, file_name, **kwargs): return self.from_fmt_obj(ff(), file_name, **kwargs) - from_format.__doc__ = "Read data from :class:`%s` format." % (get_cls_name(ff)) + + from_format.__doc__ = "Read data from :class:`%s` format." % ( + get_cls_name(ff) + ) return from_format setattr(System, method, get_func(formatcls)) @@ -1471,9 +1568,11 @@ def from_format(self, file_name, **kwargs): setattr(MultiSystems, method, get_func(formatcls)) for method, formatcls in Format.get_to_methods().items(): + def get_func(ff): def to_format(self, *args, **kwargs): return self.to_fmt_obj(ff(), *args, **kwargs) + to_format.__doc__ = "Dump data to :class:`%s` format." % (get_cls_name(ff)) return to_format @@ -1481,4 +1580,5 @@ def to_format(self, *args, **kwargs): setattr(LabeledSystem, method, get_func(formatcls)) setattr(MultiSystems, method, get_func(formatcls)) + add_format_methods() diff --git a/dpdata/unit.py b/dpdata/unit.py index 0c612256b..a1dc1c7bf 100644 --- a/dpdata/unit.py +++ b/dpdata/unit.py @@ -1,11 +1,11 @@ from abc import ABC from scipy import constants -AVOGADRO = constants.Avogadro # Avagadro constant -ELE_CHG = constants.elementary_charge # Elementary Charge, in C -BOHR = constants.value("atomic unit of length") # Bohr, in m -HARTREE = constants.value("atomic unit of energy") # Hartree, in Jole -RYDBERG = constants.Rydberg * constants.h * constants.c # Rydberg, in Jole +AVOGADRO = constants.Avogadro # Avagadro constant +ELE_CHG = constants.elementary_charge # Elementary Charge, in C +BOHR = constants.value("atomic unit of length") # Bohr, in m +HARTREE = constants.value("atomic unit of energy") # Hartree, in Jole +RYDBERG = constants.Rydberg * constants.h * constants.c # Rydberg, in Jole # energy conversions econvs = { @@ -15,17 +15,18 @@ "kcal_mol": 1 / (ELE_CHG * AVOGADRO / 1000 / 4.184), "rydberg": RYDBERG / ELE_CHG, "J": 1 / ELE_CHG, - "kJ": 1000 / ELE_CHG + "kJ": 1000 / ELE_CHG, } # length conversions lconvs = { "angstrom": 1.0, - "bohr": BOHR * 1E10, + "bohr": BOHR * 1e10, "nm": 10.0, - "m": 1E10, + "m": 1e10, } + def check_unit(unit): if unit not in econvs.keys() and unit not in lconvs.keys(): try: @@ -38,6 +39,7 @@ def check_unit(unit): except Exception: raise RuntimeError(f"Invalid unit: {unit}") + class Conversion(ABC): def __init__(self, unitA, unitB, check=True): """ @@ -48,7 +50,7 @@ def __init__(self, unitA, unitB, check=True): unitA : str, unit to be converted unitB : str, unit which unitA is converted to, i.e. `1 unitA = self._value unitB` check : bool, whether to check unit validity - + Examples -------- >>> conv = Conversion("foo", "bar", check=False) @@ -64,19 +66,20 @@ def __init__(self, unitA, unitB, check=True): self.unitA = unitA self.unitB = unitB self._value = 0.0 - + def value(self): return self._value - + def set_value(self, value): self._value = value - + def __repr__(self): return f"1 {self.unitA} = {self._value} {self.unitB}" - + def __str__(self): return self.__repr__() + class EnergyConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -91,6 +94,7 @@ def __init__(self, unitA, unitB): super().__init__(unitA, unitB) self.set_value(econvs[unitA] / econvs[unitB]) + class LengthConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -105,6 +109,7 @@ def __init__(self, unitA, unitB): super().__init__(unitA, unitB) self.set_value(lconvs[unitA] / lconvs[unitB]) + class ForceConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -125,6 +130,7 @@ def __init__(self, unitA, unitB): lconv = LengthConversion(unitA.split("/")[1], unitB.split("/")[1]).value() self.set_value(econv / lconv) + class PressureConversion(Conversion): def __init__(self, unitA, unitB): """ @@ -148,18 +154,18 @@ def __init__(self, unitA, unitB): econv = EnergyConversion(eunitA, eunitB).value() * factorA / factorB lconv = LengthConversion(lunitA, lunitB).value() self.set_value(econv / lconv**3) - + def _convert_unit(self, unit): if unit == "Pa" or unit == "pa": return "J/m^3", 1 elif unit == "kPa" or unit == "kpa": return "kJ/m^3", 1 elif unit == "GPa" or unit == "Gpa": - return "kJ/m^3", 1E6 + return "kJ/m^3", 1e6 elif unit == "bar": - return "J/m^3", 1E5 + return "J/m^3", 1e5 elif unit == "kbar": - return "kJ/m^3", 1E5 + return "kJ/m^3", 1e5 else: return unit, 1 diff --git a/dpdata/utils.py b/dpdata/utils.py index d0ccb26bb..90fef137d 100644 --- a/dpdata/utils.py +++ b/dpdata/utils.py @@ -1,38 +1,44 @@ import numpy as np from dpdata.periodic_table import Element -def elements_index_map(elements,standard=False,inverse=False): + +def elements_index_map(elements, standard=False, inverse=False): if standard: elements.sort(key=lambda x: Element(x).Z) if inverse: - return dict(zip(range(len(elements)),elements)) + return dict(zip(range(len(elements)), elements)) else: - return dict(zip(elements,range(len(elements)))) + return dict(zip(elements, range(len(elements)))) + + # %% -def remove_pbc(system, protect_layer = 9): + +def remove_pbc(system, protect_layer=9): nframes = len(system["coords"]) - natoms = len(system['coords'][0]) + natoms = len(system["coords"][0]) for ff in range(nframes): - tmpcoord = system['coords'][ff] - cog = np.average(tmpcoord, axis = 0) + tmpcoord = system["coords"][ff] + cog = np.average(tmpcoord, axis=0) dist = tmpcoord - np.tile(cog, [natoms, 1]) - max_dist = np.max(np.linalg.norm(dist, axis = 1)) + max_dist = np.max(np.linalg.norm(dist, axis=1)) h_cell_size = max_dist + protect_layer cell_size = h_cell_size * 2 - shift = np.array([1,1,1]) * h_cell_size - cog - system['coords'][ff] = system['coords'][ff] + np.tile(shift, [natoms, 1]) - system['cells'][ff] = cell_size * np.eye(3) + shift = np.array([1, 1, 1]) * h_cell_size - cog + system["coords"][ff] = system["coords"][ff] + np.tile(shift, [natoms, 1]) + system["cells"][ff] = cell_size * np.eye(3) return system + def add_atom_names(data, atom_names): """ Add atom_names that do not exist. """ - data['atom_names'].extend(atom_names) - data['atom_numbs'].extend([0 for _ in atom_names]) + data["atom_names"].extend(atom_names) + data["atom_numbs"].extend([0 for _ in atom_names]) return data + def sort_atom_names(data, type_map=None): """ Sort atom_names of the system and reorder atom_numbs and atom_types accoarding @@ -47,7 +53,7 @@ def sort_atom_names(data, type_map=None): if type_map is not None: # assign atom_names index to the specify order # atom_names must be a subset of type_map - assert (set(data['atom_names']).issubset(set(type_map))) + assert set(data["atom_names"]).issubset(set(type_map)) # for the condition that type_map is a proper superset of atom_names # new_atoms = set(type_map) - set(data["atom_names"]) new_atoms = [e for e in type_map if e not in data["atom_names"]] @@ -57,16 +63,17 @@ def sort_atom_names(data, type_map=None): # a[as[a]] == b[as[b]] as == argsort # as[as[b]] == as^{-1}[b] # a[as[a][as[as[b]]]] = b[as[b][as^{-1}[b]]] = b[id] - idx = np.argsort(data['atom_names'])[np.argsort(np.argsort(type_map))] + idx = np.argsort(data["atom_names"])[np.argsort(np.argsort(type_map))] else: # index that will sort an array by alphabetical order - idx = np.argsort(data['atom_names']) + idx = np.argsort(data["atom_names"]) # sort atom_names, atom_numbs, atom_types by idx - data['atom_names'] = list(np.array(data['atom_names'])[idx]) - data['atom_numbs'] = list(np.array(data['atom_numbs'])[idx]) - data['atom_types'] = np.argsort(idx)[data['atom_types']] + data["atom_names"] = list(np.array(data["atom_names"])[idx]) + data["atom_numbs"] = list(np.array(data["atom_numbs"])[idx]) + data["atom_types"] = np.argsort(idx)[data["atom_types"]] return data + def uniq_atom_names(data): """ Make the atom names uniq. For example @@ -80,12 +87,14 @@ def uniq_atom_names(data): """ unames = [] uidxmap = [] - for idx,ii in enumerate(data['atom_names']): + for idx, ii in enumerate(data["atom_names"]): if ii not in unames: unames.append(ii) uidxmap.append(unames.index(ii)) - data['atom_names'] = unames - tmp_type = list(data['atom_types']).copy() - data['atom_types'] = np.array([uidxmap[jj] for jj in tmp_type], dtype=int) - data['atom_numbs'] = [sum( ii == data['atom_types'] ) for ii in range(len(data['atom_names'])) ] + data["atom_names"] = unames + tmp_type = list(data["atom_types"]).copy() + data["atom_types"] = np.array([uidxmap[jj] for jj in tmp_type], dtype=int) + data["atom_numbs"] = [ + sum(ii == data["atom_types"]) for ii in range(len(data["atom_names"])) + ] return data diff --git a/dpdata/vasp/outcar.py b/dpdata/vasp/outcar.py index 3e32a1461..ec26a1812 100644 --- a/dpdata/vasp/outcar.py +++ b/dpdata/vasp/outcar.py @@ -2,103 +2,122 @@ import re import warnings -def system_info(lines, type_idx_zero = False): + +def system_info(lines, type_idx_zero=False): atom_names = [] atom_numbs = None nelm = None for ii in lines: - ii_word_list=ii.split() - if 'TITEL' in ii : + ii_word_list = ii.split() + if "TITEL" in ii: # get atom names from POTCAR info, tested only for PAW_PBE ... - _ii=ii.split()[3] - if '_' in _ii: + _ii = ii.split()[3] + if "_" in _ii: # for case like : TITEL = PAW_PBE Sn_d 06Sep2000 - atom_names.append(_ii.split('_')[0]) + atom_names.append(_ii.split("_")[0]) else: atom_names.append(_ii) - #a stricker check for "NELM"; compatible with distingct formats in different versions(6 and older, newers_expect-to-work) of vasp + # a stricker check for "NELM"; compatible with distingct formats in different versions(6 and older, newers_expect-to-work) of vasp elif nelm is None: - m = re.search(r'NELM\s*=\s*(\d+)', ii) + m = re.search(r"NELM\s*=\s*(\d+)", ii) if m: nelm = int(m.group(1)) - if 'ions per type' in ii : + if "ions per type" in ii: atom_numbs_ = [int(s) for s in ii.split()[4:]] - if atom_numbs is None : + if atom_numbs is None: atom_numbs = atom_numbs_ - else : - assert (atom_numbs == atom_numbs_), "in consistent numb atoms in OUTCAR" - assert(nelm is not None), "cannot find maximum steps for each SC iteration" - assert(atom_numbs is not None), "cannot find ion type info in OUTCAR" - atom_names = atom_names[:len(atom_numbs)] + else: + assert atom_numbs == atom_numbs_, "in consistent numb atoms in OUTCAR" + assert nelm is not None, "cannot find maximum steps for each SC iteration" + assert atom_numbs is not None, "cannot find ion type info in OUTCAR" + atom_names = atom_names[: len(atom_numbs)] atom_types = [] - for idx,ii in enumerate(atom_numbs): - for jj in range(ii) : - if type_idx_zero : + for idx, ii in enumerate(atom_numbs): + for jj in range(ii): + if type_idx_zero: atom_types.append(idx) - else : - atom_types.append(idx+1) - return atom_names, atom_numbs, np.array(atom_types, dtype = int), nelm + else: + atom_types.append(idx + 1) + return atom_names, atom_numbs, np.array(atom_types, dtype=int), nelm -def get_outcar_block(fp, ml = False): +def get_outcar_block(fp, ml=False): blk = [] - energy_token = ['free energy TOTEN', 'free energy ML TOTEN'] + energy_token = ["free energy TOTEN", "free energy ML TOTEN"] ml_index = int(ml) - for ii in fp : - if not ii : + for ii in fp: + if not ii: return blk - blk.append(ii.rstrip('\n')) + blk.append(ii.rstrip("\n")) if energy_token[ml_index] in ii: return blk return blk + # we assume that the force is printed ... -def get_frames(fname, begin = 0, step = 1, ml = False, convergence_check=True): +def get_frames(fname, begin=0, step=1, ml=False, convergence_check=True): fp = open(fname) blk = get_outcar_block(fp) - atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero = True) + atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero=True) ntot = sum(atom_numbs) all_coords = [] all_cells = [] all_energies = [] all_forces = [] - all_virials = [] + all_virials = [] cc = 0 rec_failed = [] - while len(blk) > 0 : - if cc >= begin and (cc - begin) % step == 0 : - coord, cell, energy, force, virial, is_converge = analyze_block(blk, ntot, nelm, ml) + while len(blk) > 0: + if cc >= begin and (cc - begin) % step == 0: + coord, cell, energy, force, virial, is_converge = analyze_block( + blk, ntot, nelm, ml + ) if len(coord) == 0: break - if is_converge or not convergence_check: + if is_converge or not convergence_check: all_coords.append(coord) all_cells.append(cell) all_energies.append(energy) all_forces.append(force) - if virial is not None : + if virial is not None: all_virials.append(virial) if not is_converge: - rec_failed.append(cc+1) + rec_failed.append(cc + 1) blk = get_outcar_block(fp, ml) cc += 1 - - if len(rec_failed) > 0 : - prt = "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." - warnings.warn(f"The following structures were unconverged: {rec_failed}; "+prt) - - if len(all_virials) == 0 : + + if len(rec_failed) > 0: + prt = ( + "so they are not collected." + if convergence_check + else "but they are still collected due to the requirement for ignoring convergence checks." + ) + warnings.warn( + f"The following structures were unconverged: {rec_failed}; " + prt + ) + + if len(all_virials) == 0: all_virials = None - else : + else: all_virials = np.array(all_virials) fp.close() - return atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), np.array(all_energies), np.array(all_forces), all_virials + return ( + atom_names, + atom_numbs, + atom_types, + np.array(all_cells), + np.array(all_coords), + np.array(all_energies), + np.array(all_forces), + all_virials, + ) -def analyze_block(lines, ntot, nelm, ml = False): +def analyze_block(lines, ntot, nelm, ml=False): coord = [] cell = [] energy = None @@ -106,36 +125,39 @@ def analyze_block(lines, ntot, nelm, ml = False): virial = None is_converge = True sc_index = 0 - #select different searching tokens based on the ml label - energy_token = ['free energy TOTEN', 'free energy ML TOTEN'] + # select different searching tokens based on the ml label + energy_token = ["free energy TOTEN", "free energy ML TOTEN"] energy_index = [4, 5] - virial_token = ['FORCE on cell =-STRESS in cart. coord. units', 'ML FORCE'] + virial_token = ["FORCE on cell =-STRESS in cart. coord. units", "ML FORCE"] virial_index = [14, 4] - cell_token = ['VOLUME and BASIS', 'ML FORCE'] + cell_token = ["VOLUME and BASIS", "ML FORCE"] cell_index = [5, 12] ml_index = int(ml) - for idx,ii in enumerate(lines): - #if set ml == True, is_converged will always be True - if ('Iteration' in ii) and (not ml): + for idx, ii in enumerate(lines): + # if set ml == True, is_converged will always be True + if ("Iteration" in ii) and (not ml): sc_index = int(ii.split()[3][:-1]) if sc_index >= nelm: is_converge = False elif energy_token[ml_index] in ii: energy = float(ii.split()[energy_index[ml_index]]) - assert((force is not None) and len(coord) > 0 and len(cell) > 0) + assert (force is not None) and len(coord) > 0 and len(cell) > 0 return coord, cell, energy, force, virial, is_converge elif cell_token[ml_index] in ii: - for dd in range(3) : - tmp_l = lines[idx+cell_index[ml_index]+dd] - cell.append([float(ss) - for ss in tmp_l.replace('-',' -').split()[0:3]]) + for dd in range(3): + tmp_l = lines[idx + cell_index[ml_index] + dd] + cell.append([float(ss) for ss in tmp_l.replace("-", " -").split()[0:3]]) elif virial_token[ml_index] in ii: in_kB_index = virial_index[ml_index] - while idx+in_kB_index < len(lines) and (not lines[idx+in_kB_index].split()[0:2] == ["in", "kB"]) : + while idx + in_kB_index < len(lines) and ( + not lines[idx + in_kB_index].split()[0:2] == ["in", "kB"] + ): in_kB_index += 1 - assert(idx+in_kB_index < len(lines)),'ERROR: "in kB" is not found in OUTCAR. Unable to extract virial.' - tmp_v = [float(ss) for ss in lines[idx+in_kB_index].split()[2:8]] - virial = np.zeros([3,3]) + assert idx + in_kB_index < len( + lines + ), 'ERROR: "in kB" is not found in OUTCAR. Unable to extract virial.' + tmp_v = [float(ss) for ss in lines[idx + in_kB_index].split()[2:8]] + virial = np.zeros([3, 3]) virial[0][0] = tmp_v[0] virial[1][1] = tmp_v[1] virial[2][2] = tmp_v[2] @@ -145,8 +167,8 @@ def analyze_block(lines, ntot, nelm, ml = False): virial[2][1] = tmp_v[4] virial[0][2] = tmp_v[5] virial[2][0] = tmp_v[5] - elif 'TOTAL-FORCE' in ii and (("ML" in ii) == ml): - for jj in range(idx+2, idx+2+ntot) : + elif "TOTAL-FORCE" in ii and (("ML" in ii) == ml): + for jj in range(idx + 2, idx + 2 + ntot): tmp_l = lines[jj] info = [float(ss) for ss in tmp_l.split()] coord.append(info[:3]) diff --git a/dpdata/vasp/poscar.py b/dpdata/vasp/poscar.py index 53d93782b..1100eb852 100644 --- a/dpdata/vasp/poscar.py +++ b/dpdata/vasp/poscar.py @@ -1,86 +1,90 @@ -#!/usr/bin/python3 +#!/usr/bin/python3 import numpy as np -def _to_system_data_lower(lines, cartesian = True) : - ''' + +def _to_system_data_lower(lines, cartesian=True): + """ Treat as cartesian poscar - ''' + """ system = {} - system['atom_names'] = [str(ii) for ii in lines[5].split()] - system['atom_numbs'] = [int(ii) for ii in lines[6].split()] + system["atom_names"] = [str(ii) for ii in lines[5].split()] + system["atom_numbs"] = [int(ii) for ii in lines[6].split()] scale = float(lines[1]) cell = [] - for ii in range(2,5) : + for ii in range(2, 5): boxv = [float(jj) for jj in lines[ii].split()] boxv = np.array(boxv) * scale cell.append(boxv) - system['cells'] = [np.array(cell)] - natoms = sum(system['atom_numbs']) + system["cells"] = [np.array(cell)] + natoms = sum(system["atom_numbs"]) coord = [] - for ii in range(8, 8+natoms) : + for ii in range(8, 8 + natoms): tmpv = [float(jj) for jj in lines[ii].split()[:3]] - if cartesian : + if cartesian: tmpv = np.array(tmpv) * scale - else : - tmpv = np.matmul(np.array(tmpv), system['cells'][0]) + else: + tmpv = np.matmul(np.array(tmpv), system["cells"][0]) coord.append(tmpv) - system['coords'] = [np.array(coord)] - system['orig'] = np.zeros(3) + system["coords"] = [np.array(coord)] + system["orig"] = np.zeros(3) atom_types = [] - for idx,ii in enumerate(system['atom_numbs']) : - for jj in range(ii) : + for idx, ii in enumerate(system["atom_numbs"]): + for jj in range(ii): atom_types.append(idx) - system['atom_types'] = np.array(atom_types, dtype = int) - system['cells'] = np.array(system['cells']) - system['coords'] = np.array(system['coords']) + system["atom_types"] = np.array(atom_types, dtype=int) + system["cells"] = np.array(system["cells"]) + system["coords"] = np.array(system["coords"]) return system -def to_system_data(lines) : +def to_system_data(lines): # remove the line that has 'selective dynamics' - if lines[7][0] == 'S' or lines[7][0] == 's' : + if lines[7][0] == "S" or lines[7][0] == "s": lines.pop(7) - is_cartesian = (lines[7][0] in ['C', 'c', 'K', 'k']) - if not is_cartesian : - if not (lines[7][0] in ['d', 'D']) : - raise RuntimeError('seem not to be a valid POSCAR of vasp 5.x, may be a POSCAR of vasp 4.x?') + is_cartesian = lines[7][0] in ["C", "c", "K", "k"] + if not is_cartesian: + if not (lines[7][0] in ["d", "D"]): + raise RuntimeError( + "seem not to be a valid POSCAR of vasp 5.x, may be a POSCAR of vasp 4.x?" + ) return _to_system_data_lower(lines, is_cartesian) -def from_system_data(system, f_idx = 0, skip_zeros = True) : - ret = '' - for ii,name in zip(system['atom_numbs'], system['atom_names']) : - if ii == 0: continue - ret += '%s%d ' % (name, ii) - ret += '\n' - ret += '1.0\n' - for ii in system['cells'][f_idx] : - for jj in ii : - ret += '%.16e ' % jj - ret += '\n' - for idx,ii in enumerate(system['atom_names']) : - if system['atom_numbs'][idx] == 0: continue - ret += '%s ' % ii - ret += '\n' - for ii in system['atom_numbs'] : - if ii == 0: continue - ret += '%d ' % ii - ret += '\n' +def from_system_data(system, f_idx=0, skip_zeros=True): + ret = "" + for ii, name in zip(system["atom_numbs"], system["atom_names"]): + if ii == 0: + continue + ret += "%s%d " % (name, ii) + ret += "\n" + ret += "1.0\n" + for ii in system["cells"][f_idx]: + for jj in ii: + ret += "%.16e " % jj + ret += "\n" + for idx, ii in enumerate(system["atom_names"]): + if system["atom_numbs"][idx] == 0: + continue + ret += "%s " % ii + ret += "\n" + for ii in system["atom_numbs"]: + if ii == 0: + continue + ret += "%d " % ii + ret += "\n" # should use Cartesian for VESTA software - ret += 'Cartesian\n' - atype = system['atom_types'] - posis = system['coords'][f_idx] + ret += "Cartesian\n" + atype = system["atom_types"] + posis = system["coords"][f_idx] # atype_idx = [[idx,tt] for idx,tt in enumerate(atype)] # sort_idx = np.argsort(atype, kind = 'mergesort') sort_idx = np.lexsort((np.arange(len(atype)), atype)) atype = atype[sort_idx] posis = posis[sort_idx] posi_list = [] - for ii in posis : - posi_list.append('%15.10f %15.10f %15.10f' % \ - (ii[0], ii[1], ii[2]) - ) - posi_list.append('') - ret += '\n'.join(posi_list) + for ii in posis: + posi_list.append("%15.10f %15.10f %15.10f" % (ii[0], ii[1], ii[2])) + posi_list.append("") + ret += "\n".join(posi_list) return ret diff --git a/dpdata/vasp/xml.py b/dpdata/vasp/xml.py index d5e1fb039..f87b57168 100755 --- a/dpdata/vasp/xml.py +++ b/dpdata/vasp/xml.py @@ -3,75 +3,89 @@ import xml.etree.ElementTree as ET import numpy as np -def check_name(item, name) : - assert (item.attrib['name'] == name), "item attrib '%s' dose not math required '%s'" % (item.attrib['name'], name) -def get_varray(varray) : +def check_name(item, name): + assert ( + item.attrib["name"] == name + ), "item attrib '%s' dose not math required '%s'" % (item.attrib["name"], name) + + +def get_varray(varray): array = [] - for vv in varray.findall('v') : - array.append([ float(ii) for ii in vv.text.split()]) + for vv in varray.findall("v"): + array.append([float(ii) for ii in vv.text.split()]) return np.array(array) -def analyze_atominfo(atominfo_xml) : - check_name(atominfo_xml.find('array'), 'atoms') + +def analyze_atominfo(atominfo_xml): + check_name(atominfo_xml.find("array"), "atoms") eles = [] types = [] - for ii in atominfo_xml.find('array').find('set') : - eles .append((ii.findall('c')[0].text.strip())) - types.append(int(ii.findall('c')[1].text)) + for ii in atominfo_xml.find("array").find("set"): + eles.append((ii.findall("c")[0].text.strip())) + types.append(int(ii.findall("c")[1].text)) uniq_ele = [] - for ii in eles : - if not(ii in uniq_ele): + for ii in eles: + if not (ii in uniq_ele): uniq_ele.append(ii) return uniq_ele, types -def analyze_calculation(cc) : - structure_xml = cc.find('structure') - check_name(structure_xml.find('crystal').find('varray'), 'basis') - check_name(structure_xml.find('varray'), 'positions') - cell = get_varray(structure_xml.find('crystal').find('varray')) - posi = get_varray(structure_xml.find('varray')) + +def analyze_calculation(cc): + structure_xml = cc.find("structure") + check_name(structure_xml.find("crystal").find("varray"), "basis") + check_name(structure_xml.find("varray"), "positions") + cell = get_varray(structure_xml.find("crystal").find("varray")) + posi = get_varray(structure_xml.find("varray")) strs = None - for vv in cc.findall('varray') : - if vv.attrib['name'] == 'forces' : - forc = get_varray(vv) - elif vv.attrib['name'] == 'stress' : + for vv in cc.findall("varray"): + if vv.attrib["name"] == "forces": + forc = get_varray(vv) + elif vv.attrib["name"] == "stress": strs = get_varray(vv) - for ii in cc.find('energy').findall('i') : - if ii.attrib['name'] == 'e_fr_energy' : + for ii in cc.find("energy").findall("i"): + if ii.attrib["name"] == "e_fr_energy": ener = float(ii.text) # print(ener) # return 'a' return posi, cell, ener, forc, strs -def formulate_config(eles, types, posi, cell, ener, forc, strs_) : + +def formulate_config(eles, types, posi, cell, ener, forc, strs_): strs = strs_ / 1602 natoms = len(types) - ntypes = len(eles) + ntypes = len(eles) ret = "" - ret += "#N %d %d\n" % (natoms, ntypes-1) + ret += "#N %d %d\n" % (natoms, ntypes - 1) ret += "#C " - for ii in eles : - ret += ' ' + ii - ret += '\n' + for ii in eles: + ret += " " + ii + ret += "\n" ret += "##\n" - ret += '#X %13.8f %13.8f %13.8f\n' % (cell[0][0], cell[0][1], cell[0][2]) - ret += '#Y %13.8f %13.8f %13.8f\n' % (cell[1][0], cell[1][1], cell[1][2]) - ret += '#Z %13.8f %13.8f %13.8f\n' % (cell[2][0], cell[2][1], cell[2][2]) + ret += "#X %13.8f %13.8f %13.8f\n" % (cell[0][0], cell[0][1], cell[0][2]) + ret += "#Y %13.8f %13.8f %13.8f\n" % (cell[1][0], cell[1][1], cell[1][2]) + ret += "#Z %13.8f %13.8f %13.8f\n" % (cell[2][0], cell[2][1], cell[2][2]) ret += "#W 1.0\n" ret += "#E %.10f\n" % (ener / natoms) - ret += '#S %.9e %.9e %.9e %.9e %.9e %.9e\n' % \ - (strs[0][0], strs[1][1], strs[2][2], strs[0][1], strs[1][2], strs[0][2]) - ret += '#F\n' - for ii in range(natoms) : + ret += "#S %.9e %.9e %.9e %.9e %.9e %.9e\n" % ( + strs[0][0], + strs[1][1], + strs[2][2], + strs[0][1], + strs[1][2], + strs[0][2], + ) + ret += "#F\n" + for ii in range(natoms): sp = np.matmul(cell.T, posi[ii]) - ret += '%d' % (types[ii]-1) - ret += ' %12.6f %12.6f %12.6f' % (sp[0], sp[1], sp[2]) - ret += ' %12.6f %12.6f %12.6f' % (forc[ii][0], forc[ii][1], forc[ii][2]) - ret += '\n' + ret += "%d" % (types[ii] - 1) + ret += " %12.6f %12.6f %12.6f" % (sp[0], sp[1], sp[2]) + ret += " %12.6f %12.6f %12.6f" % (forc[ii][0], forc[ii][1], forc[ii][2]) + ret += "\n" return ret -def analyze (fname, type_idx_zero = False, begin = 0, step = 1) : + +def analyze(fname, type_idx_zero=False, begin=0, step=1): """ can deal with broken xml file """ @@ -83,22 +97,37 @@ def analyze (fname, type_idx_zero = False, begin = 0, step = 1) : cc = 0 try: for event, elem in ET.iterparse(fname): - if elem.tag == 'atominfo' : + if elem.tag == "atominfo": eles, types = analyze_atominfo(elem) - types = np.array(types, dtype = int) - if type_idx_zero : + types = np.array(types, dtype=int) + if type_idx_zero: types = types - 1 - if elem.tag == 'calculation' : + if elem.tag == "calculation": posi, cell, ener, forc, strs = analyze_calculation(elem) - if cc >= begin and (cc - begin) % step == 0 : + if cc >= begin and (cc - begin) % step == 0: all_posi.append(posi) all_cell.append(cell) all_ener.append(ener) all_forc.append(forc) - if strs is not None : - all_strs.append(strs) + if strs is not None: + all_strs.append(strs) cc += 1 except ET.ParseError: - return eles, types, np.array(all_cell), np.array(all_posi), np.array(all_ener), np.array(all_forc), np.array(all_strs) - return eles, types, np.array(all_cell), np.array(all_posi), np.array(all_ener), np.array(all_forc), np.array(all_strs) - + return ( + eles, + types, + np.array(all_cell), + np.array(all_posi), + np.array(all_ener), + np.array(all_forc), + np.array(all_strs), + ) + return ( + eles, + types, + np.array(all_cell), + np.array(all_posi), + np.array(all_ener), + np.array(all_forc), + np.array(all_strs), + ) diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index e902958ac..ea2d9a776 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -1,24 +1,30 @@ #!/usr/bin/env python3 -#%% +#%% import numpy as np from collections import OrderedDict -import re +import re + + class QuipGapxyzSystems(object): """ - deal with QuipGapxyzFile + deal with QuipGapxyzFile """ + def __init__(self, file_name): - self.file_object = open(file_name, 'r') + self.file_object = open(file_name, "r") self.block_generator = self.get_block_generator() + def __iter__(self): return self + def __next__(self): return self.handle_single_xyz_frame(next(self.block_generator)) + def __del__(self): self.file_object.close() - + def get_block_generator(self): - p3 = re.compile(r'^\s*(\d+)\s*') + p3 = re.compile(r"^\s*(\d+)\s*") while True: line = self.file_object.readline() if not line: @@ -27,28 +33,45 @@ def get_block_generator(self): atom_num = int(p3.match(line).group(1)) lines = [] lines.append(line) - for ii in range(atom_num+1): + for ii in range(atom_num + 1): lines.append(self.file_object.readline()) if not lines[-1]: - raise RuntimeError("this xyz file may lack of lines, should be {};lines:{}".format(atom_num+2, lines)) + raise RuntimeError( + "this xyz file may lack of lines, should be {};lines:{}".format( + atom_num + 2, lines + ) + ) yield lines - + @staticmethod def handle_single_xyz_frame(lines): - atom_num = int(lines[0].strip('\n').strip()) + atom_num = int(lines[0].strip("\n").strip()) if len(lines) != atom_num + 2: - raise RuntimeError("format error, atom_num=={}, {}!=atom_num+2".format(atom_num, len(lines))) - data_format_line = lines[1].strip('\n').strip()+str(' ') - field_value_pattern= re.compile(r'(?P\S+)=(?P[\'\"]?)(?P.*?)(?P=quote)\s+') - prop_pattern = re.compile(r'(?P\w+?):(?P[a-zA-Z]):(?P\d+)') + raise RuntimeError( + "format error, atom_num=={}, {}!=atom_num+2".format( + atom_num, len(lines) + ) + ) + data_format_line = lines[1].strip("\n").strip() + str(" ") + field_value_pattern = re.compile( + r"(?P\S+)=(?P[\'\"]?)(?P.*?)(?P=quote)\s+" + ) + prop_pattern = re.compile( + r"(?P\w+?):(?P[a-zA-Z]):(?P\d+)" + ) - data_format_list= [kv_dict.groupdict() for kv_dict in field_value_pattern.finditer(data_format_line)] + data_format_list = [ + kv_dict.groupdict() + for kv_dict in field_value_pattern.finditer(data_format_line) + ] field_dict = {} for item in data_format_list: - field_dict[item['key']]=item['value'] + field_dict[item["key"]] = item["value"] - Properties = field_dict['Properties'] - prop_list = [kv_dict.groupdict() for kv_dict in prop_pattern.finditer(Properties)] + Properties = field_dict["Properties"] + prop_list = [ + kv_dict.groupdict() for kv_dict in prop_pattern.finditer(Properties) + ] data_lines = [] for line in lines[2:]: @@ -60,38 +83,58 @@ def handle_single_xyz_frame(lines): coords_array = None Z_array = None force_array = None - virials = None + virials = None for kv_dict in prop_list: - if kv_dict['key'] == 'species': - if kv_dict['datatype'] != 'S': - raise RuntimeError("datatype for species must be 'S' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - type_array = data_array[:,used_colomn:used_colomn+field_length].flatten() + if kv_dict["key"] == "species": + if kv_dict["datatype"] != "S": + raise RuntimeError( + "datatype for species must be 'S' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + type_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() used_colomn += field_length continue - elif kv_dict['key'] == 'pos': - if kv_dict['datatype'] != 'R': - raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - coords_array = data_array[:,used_colomn:used_colomn+field_length] + elif kv_dict["key"] == "pos": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + coords_array = data_array[:, used_colomn : used_colomn + field_length] used_colomn += field_length continue - elif kv_dict['key'] == 'Z': - if kv_dict['datatype'] != 'I': - raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - Z_array = data_array[:,used_colomn:used_colomn+field_length].flatten() + elif kv_dict["key"] == "Z": + if kv_dict["datatype"] != "I": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + Z_array = data_array[ + :, used_colomn : used_colomn + field_length + ].flatten() used_colomn += field_length continue - elif kv_dict['key'] == 'force': - if kv_dict['datatype'] != 'R': - raise RuntimeError("datatype for pos must be 'R' instead of {}".format(kv_dict['datatype'])) - field_length = int(kv_dict['value']) - force_array = data_array[:,used_colomn:used_colomn+field_length] + elif kv_dict["key"] == "force": + if kv_dict["datatype"] != "R": + raise RuntimeError( + "datatype for pos must be 'R' instead of {}".format( + kv_dict["datatype"] + ) + ) + field_length = int(kv_dict["value"]) + force_array = data_array[:, used_colomn : used_colomn + field_length] used_colomn += field_length continue else: - raise RuntimeError("unknown field {}".format(kv_dict['key'])) + raise RuntimeError("unknown field {}".format(kv_dict["key"])) type_num_dict = OrderedDict() atom_type_list = [] @@ -111,23 +154,35 @@ def handle_single_xyz_frame(lines): atom_type_list.append(temp_atom_index) type_num_dict[ii] += 1 type_num_list = [] - for atom_type,atom_num in type_num_dict.items(): - type_num_list.append((atom_type,atom_num)) + for atom_type, atom_num in type_num_dict.items(): + type_num_list.append((atom_type, atom_num)) type_num_array = np.array(type_num_list) - if field_dict.get('virial', None): - virials = np.array([np.array(list(filter(bool,field_dict['virial'].split(' ')))).reshape(3,3)]).astype('float32') + if field_dict.get("virial", None): + virials = np.array( + [ + np.array( + list(filter(bool, field_dict["virial"].split(" "))) + ).reshape(3, 3) + ] + ).astype("float32") else: virials = None info_dict = {} - info_dict['atom_names'] = list(type_num_array[:,0]) - info_dict['atom_numbs'] = list(type_num_array[:,1].astype(int)) - info_dict['atom_types'] = np.array(atom_type_list).astype(int) - info_dict['cells'] = np.array([np.array(list(filter(bool,field_dict['Lattice'].split(' ')))).reshape(3,3)]).astype('float32') - info_dict['coords'] = np.array([coords_array]).astype('float32') - info_dict['energies'] = np.array([field_dict['energy']]).astype('float32') - info_dict['forces'] = np.array([force_array]).astype('float32') + info_dict["atom_names"] = list(type_num_array[:, 0]) + info_dict["atom_numbs"] = list(type_num_array[:, 1].astype(int)) + info_dict["atom_types"] = np.array(atom_type_list).astype(int) + info_dict["cells"] = np.array( + [ + np.array(list(filter(bool, field_dict["Lattice"].split(" ")))).reshape( + 3, 3 + ) + ] + ).astype("float32") + info_dict["coords"] = np.array([coords_array]).astype("float32") + info_dict["energies"] = np.array([field_dict["energy"]]).astype("float32") + info_dict["forces"] = np.array([force_array]).astype("float32") if virials is not None: - info_dict['virials'] = virials - info_dict['orig'] = np.zeros(3) + info_dict["virials"] = virials + info_dict["orig"] = np.zeros(3) return info_dict diff --git a/dpdata/xyz/xyz.py b/dpdata/xyz/xyz.py index 0ca5ac311..a28bafa06 100644 --- a/dpdata/xyz/xyz.py +++ b/dpdata/xyz/xyz.py @@ -2,21 +2,22 @@ import numpy as np -def coord_to_xyz(coord: np.ndarray, types: list)->str: + +def coord_to_xyz(coord: np.ndarray, types: list) -> str: """Convert coordinates and types to xyz format. - + Parameters ---------- coord: np.ndarray coordinates, Nx3 array types: list list of types - + Returns ------- str xyz format string - + Examples -------- >>> coord_to_xyz(np.ones((1,3)), ["C"]) @@ -24,7 +25,7 @@ def coord_to_xyz(coord: np.ndarray, types: list)->str: C 1.000000 1.000000 1.000000 """ - buff = [str(len(types)), ''] + buff = [str(len(types)), ""] for at, cc in zip(types, coord): buff.append("{} {:.6f} {:.6f} {:.6f}".format(at, *cc)) return "\n".join(buff) @@ -47,7 +48,7 @@ def xyz_to_coord(xyz: str) -> Tuple[np.ndarray, list]: """ symbols = [] coords = [] - for ii, line in enumerate(xyz.split('\n')): + for ii, line in enumerate(xyz.split("\n")): if ii == 0: natoms = int(line.strip()) elif 2 <= ii <= 1 + natoms: @@ -56,4 +57,3 @@ def xyz_to_coord(xyz: str) -> Tuple[np.ndarray, list]: coords.append((float(x), float(y), float(z))) symbols.append(symbol) return np.array(coords), symbols - diff --git a/plugin_example/README.md b/plugin_example/README.md index 10aadf040..322756f93 100644 --- a/plugin_example/README.md +++ b/plugin_example/README.md @@ -21,4 +21,4 @@ Element List : ------------------- X 20 -``` \ No newline at end of file +``` diff --git a/plugin_example/dpdata_random/__init__.py b/plugin_example/dpdata_random/__init__.py index b9ce840d7..8e1450c9e 100644 --- a/plugin_example/dpdata_random/__init__.py +++ b/plugin_example/dpdata_random/__init__.py @@ -1,24 +1,31 @@ from dpdata.format import Format import numpy as np + @Format.register("random") class RandomFormat(Format): def from_system(self, N, **kwargs): return { "atom_numbs": [20], - "atom_names": ['X'], + "atom_names": ["X"], "atom_types": [0] * 20, - "cells": np.repeat(np.diag(np.diag(np.ones((3, 3))))[np.newaxis,...], N, axis=0) * 100., - "coords": np.random.rand(N, 20, 3) * 100., + "cells": np.repeat( + np.diag(np.diag(np.ones((3, 3))))[np.newaxis, ...], N, axis=0 + ) + * 100.0, + "coords": np.random.rand(N, 20, 3) * 100.0, } def from_labeled_system(self, N, **kwargs): return { "atom_numbs": [20], - "atom_names": ['X'], + "atom_names": ["X"], "atom_types": [0] * 20, - "cells": np.repeat(np.diag(np.diag(np.ones((3, 3))))[np.newaxis,...], N, axis=0) * 100., - "coords": np.random.rand(N, 20, 3) * 100., - "energies": np.random.rand(N) * 100., - "forces": np.random.rand(N, 20, 3) * 100., - } \ No newline at end of file + "cells": np.repeat( + np.diag(np.diag(np.ones((3, 3))))[np.newaxis, ...], N, axis=0 + ) + * 100.0, + "coords": np.random.rand(N, 20, 3) * 100.0, + "energies": np.random.rand(N) * 100.0, + "forces": np.random.rand(N, 20, 3) * 100.0, + } diff --git a/requirements.txt b/requirements.txt index 2a302c7ec..1c7c22936 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ monty==2.0.4 pymatgen==2019.7.2 - diff --git a/tests/comp_sys.py b/tests/comp_sys.py index 94c3e52a0..6618cdee6 100644 --- a/tests/comp_sys.py +++ b/tests/comp_sys.py @@ -1,55 +1,61 @@ import numpy as np -class CompSys : - + +class CompSys: def test_len_func(self): - self.assertEqual(len(self.system_1),len(self.system_2)) + self.assertEqual(len(self.system_1), len(self.system_2)) def test_add_func(self): - self.assertEqual(len(self.system_1+self.system_1), - len(self.system_2+self.system_2)) + self.assertEqual( + len(self.system_1 + self.system_1), len(self.system_2 + self.system_2) + ) def test_atom_numbs(self): - self.assertEqual(self.system_1.data['atom_numbs'], - self.system_2.data['atom_numbs']) + self.assertEqual( + self.system_1.data["atom_numbs"], self.system_2.data["atom_numbs"] + ) def test_atom_names(self): - self.assertEqual(self.system_1.data['atom_names'], - self.system_2.data['atom_names']) + self.assertEqual( + self.system_1.data["atom_names"], self.system_2.data["atom_names"] + ) def test_atom_types(self): - np.testing.assert_array_equal(self.system_1.data['atom_types'], - self.system_2.data['atom_types']) + np.testing.assert_array_equal( + self.system_1.data["atom_types"], self.system_2.data["atom_types"] + ) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system_1.data['orig'][d0], - self.system_2.data['orig'][d0]) + for d0 in range(3): + self.assertEqual( + self.system_1.data["orig"][d0], self.system_2.data["orig"][d0] + ) def test_nframs(self): - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) def test_cell(self): - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) if not self.system_1.nopbc and not self.system_2.nopbc: - np.testing.assert_almost_equal(self.system_1.data['cells'], - self.system_2.data['cells'], - decimal = self.places, - err_msg = 'cell failed') - - def test_coord(self): - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) + np.testing.assert_almost_equal( + self.system_1.data["cells"], + self.system_2.data["cells"], + decimal=self.places, + err_msg="cell failed", + ) + + def test_coord(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) # think about direct coord - tmp_cell = self.system_1.data['cells'] + tmp_cell = self.system_1.data["cells"] tmp_cell = np.reshape(tmp_cell, [-1, 3]) - tmp_cell_norm = np.reshape(np.linalg.norm(tmp_cell, axis = 1), [-1, 1, 3]) - np.testing.assert_almost_equal(self.system_1.data['coords'] / tmp_cell_norm, - self.system_2.data['coords'] / tmp_cell_norm, - decimal = self.places, - err_msg = 'coord failed') + tmp_cell_norm = np.reshape(np.linalg.norm(tmp_cell, axis=1), [-1, 1, 3]) + np.testing.assert_almost_equal( + self.system_1.data["coords"] / tmp_cell_norm, + self.system_2.data["coords"] / tmp_cell_norm, + decimal=self.places, + err_msg="coord failed", + ) def test_nopbc(self): self.assertEqual(self.system_1.nopbc, self.system_2.nopbc) @@ -59,56 +65,60 @@ def test_data_check(self): self.system_2.check_data() -class CompLabeledSys (CompSys) : - def test_energy(self) : - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) - np.testing.assert_almost_equal(self.system_1.data['energies'], - self.system_2.data['energies'], - decimal = self.e_places, - err_msg = 'energies failed') - - def test_force(self) : - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) - np.testing.assert_almost_equal(self.system_1.data['forces'], - self.system_2.data['forces'], - decimal = self.f_places, - err_msg = 'forces failed') - - def test_virial(self) : - self.assertEqual(self.system_1.get_nframes(), - self.system_2.get_nframes()) +class CompLabeledSys(CompSys): + def test_energy(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) + np.testing.assert_almost_equal( + self.system_1.data["energies"], + self.system_2.data["energies"], + decimal=self.e_places, + err_msg="energies failed", + ) + + def test_force(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) + np.testing.assert_almost_equal( + self.system_1.data["forces"], + self.system_2.data["forces"], + decimal=self.f_places, + err_msg="forces failed", + ) + + def test_virial(self): + self.assertEqual(self.system_1.get_nframes(), self.system_2.get_nframes()) # if len(self.system_1['virials']) == 0: # self.assertEqual(len(self.system_1['virials']), 0) # return - if not 'virials' in self.system_1: - self.assertFalse('virials' in self.system_2) + if not "virials" in self.system_1: + self.assertFalse("virials" in self.system_2) return - np.testing.assert_almost_equal(self.system_1['virials'], - self.system_2['virials'], - decimal = self.v_places, - err_msg = 'virials failed') + np.testing.assert_almost_equal( + self.system_1["virials"], + self.system_2["virials"], + decimal=self.v_places, + err_msg="virials failed", + ) class MultiSystems: def test_systems_name(self): self.assertEqual(set(self.systems.systems), set(self.system_names)) - + def test_systems_size(self): for name, size in self.system_sizes.items(): self.assertEqual(self.systems[name].get_nframes(), size) - + def test_atom_names(self): self.assertEqual(self.atom_names, self.systems.atom_names) -class IsPBC: +class IsPBC: def test_is_pbc(self): self.assertFalse(self.system_1.nopbc) self.assertFalse(self.system_2.nopbc) -class IsNoPBC: + +class IsNoPBC: def test_is_nopbc(self): self.assertTrue(self.system_1.nopbc) self.assertTrue(self.system_2.nopbc) diff --git a/tests/context.py b/tests/context.py index 6c828a1bd..e305ff0fe 100644 --- a/tests/context.py +++ b/tests/context.py @@ -1,5 +1,6 @@ -import sys,os -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +import sys, os + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) import dpdata import dpdata.md.water import dpdata.md.msd diff --git a/tests/poscars/poscar_ref_oh.py b/tests/poscars/poscar_ref_oh.py index 9b12c1511..f120183ed 100644 --- a/tests/poscars/poscar_ref_oh.py +++ b/tests/poscars/poscar_ref_oh.py @@ -1,42 +1,50 @@ import numpy as np -class TestPOSCARoh : +class TestPOSCARoh: def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [1,1]) + self.assertEqual(self.system.data["atom_numbs"], [1, 1]) def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['O','H']) + self.assertEqual(self.system.data["atom_names"], ["O", "H"]) def test_atom_types(self): - self.assertEqual(self.system.data['atom_types'][0], 0) - self.assertEqual(self.system.data['atom_types'][1], 1) + self.assertEqual(self.system.data["atom_types"][0], 0) + self.assertEqual(self.system.data["atom_types"][1], 1) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system.data['orig'][d0], 0) + for d0 in range(3): + self.assertEqual(self.system.data["orig"][d0], 0) def test_cell(self): - ovito_cell = np.array([[2.5243712, 0.0000000, 0.0000000], - [1.2621856, 2.0430257, 0.0000000], - [1.2874292, 0.7485898, 2.2254033]]) - for ii in range(3) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['cells'][0][ii][jj], - ovito_cell[ii][jj], - places = 6, - msg = 'cell[%d][%d] failed' % (ii,jj)) + ovito_cell = np.array( + [ + [2.5243712, 0.0000000, 0.0000000], + [1.2621856, 2.0430257, 0.0000000], + [1.2874292, 0.7485898, 2.2254033], + ] + ) + for ii in range(3): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["cells"][0][ii][jj], + ovito_cell[ii][jj], + places=6, + msg="cell[%d][%d] failed" % (ii, jj), + ) def test_frame(self): if hasattr(self, "unwrap") and self.unwrap is True: - ovito_posis = np.array([[5.0739861, 2.7916155, 2.2254033], - [6.3361717, 3.4934183, 2.7767918]]) + ovito_posis = np.array( + [[5.0739861, 2.7916155, 2.2254033], [6.3361717, 3.4934183, 2.7767918]] + ) else: - ovito_posis = np.array([[0, 0, 0], - [1.2621856, 0.7018028, 0.5513885]]) - for ii in range(2) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['coords'][0][ii][jj], - ovito_posis[ii][jj], - places = 6, - msg = 'posis[%d][%d] failed' % (ii,jj)) + ovito_posis = np.array([[0, 0, 0], [1.2621856, 0.7018028, 0.5513885]]) + for ii in range(2): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["coords"][0][ii][jj], + ovito_posis[ii][jj], + places=6, + msg="posis[%d][%d] failed" % (ii, jj), + ) diff --git a/tests/poscars/test_lammps_dump_s_su.py b/tests/poscars/test_lammps_dump_s_su.py index 2370cffc4..5e914ea5f 100644 --- a/tests/poscars/test_lammps_dump_s_su.py +++ b/tests/poscars/test_lammps_dump_s_su.py @@ -2,25 +2,26 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf_s_su.dump'), - type_map = ['O', 'H']) - + def setUp(self): + self.system = dpdata.System( + os.path.join("poscars", "conf_s_su.dump"), type_map=["O", "H"] + ) + + class TestDump2(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.tmp_system = dpdata.System(os.path.join('poscars', 'conf_s_su.dump'), - type_map = ['O', 'H']) + def setUp(self): + self.tmp_system = dpdata.System( + os.path.join("poscars", "conf_s_su.dump"), type_map=["O", "H"] + ) self.system = self.tmp_system.sub_system([1]) - def test_nframes (self) : + def test_nframes(self): self.assertEqual(self.tmp_system.get_nframes(), 2) - - -if __name__ == '__main__': + + +if __name__ == "__main__": unittest.main() - diff --git a/tests/pwmat/config_ref_ch4.py b/tests/pwmat/config_ref_ch4.py index 6fd658642..71aef7fe1 100644 --- a/tests/pwmat/config_ref_ch4.py +++ b/tests/pwmat/config_ref_ch4.py @@ -1,44 +1,59 @@ import numpy as np -class Testconfigch4 : +class Testconfigch4: def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [4,1]) + self.assertEqual(self.system.data["atom_numbs"], [4, 1]) def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['H','C']) + self.assertEqual(self.system.data["atom_names"], ["H", "C"]) def test_atom_types(self): - self.assertEqual(self.system.data['atom_types'][0], 0) - self.assertEqual(self.system.data['atom_types'][1], 0) - self.assertEqual(self.system.data['atom_types'][2], 0) - self.assertEqual(self.system.data['atom_types'][3], 0) - self.assertEqual(self.system.data['atom_types'][4], 1) + self.assertEqual(self.system.data["atom_types"][0], 0) + self.assertEqual(self.system.data["atom_types"][1], 0) + self.assertEqual(self.system.data["atom_types"][2], 0) + self.assertEqual(self.system.data["atom_types"][3], 0) + self.assertEqual(self.system.data["atom_types"][4], 1) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system.data['orig'][d0], 0) + for d0 in range(3): + self.assertEqual(self.system.data["orig"][d0], 0) def test_cell(self): - ovito_cell = np.array([[10.000000, 0.0000000, 0.0000000], - [0.0000000, 10.000000, 0.0000000], - [0.0000000, 0.0000000, 10.000000]]) - for ii in range(3) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['cells'][0][ii][jj], - ovito_cell[ii][jj], - places = 6, - msg = 'cell[%d][%d] failed' % (ii,jj)) + ovito_cell = np.array( + [ + [10.000000, 0.0000000, 0.0000000], + [0.0000000, 10.000000, 0.0000000], + [0.0000000, 0.0000000, 10.000000], + ] + ) + for ii in range(3): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["cells"][0][ii][jj], + ovito_cell[ii][jj], + places=6, + msg="cell[%d][%d] failed" % (ii, jj), + ) - def test_frame(self): - ovito_posis = np.array([[0.53815434, 0.40686080, 0.36057301], - [0.39453966, 0.48032057, 0.43846884], - [0.55209243, 0.56545029, 0.44270874], - [0.52818530, 0.41641476, 0.53918266], - [0.50325059, 0.46725516, 0.44523234]])*10 - for ii in range(2) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['coords'][0][ii][jj], - ovito_posis[ii][jj], - places = 6, - msg = 'posis[%d][%d] failed' % (ii,jj)) + def test_frame(self): + ovito_posis = ( + np.array( + [ + [0.53815434, 0.40686080, 0.36057301], + [0.39453966, 0.48032057, 0.43846884], + [0.55209243, 0.56545029, 0.44270874], + [0.52818530, 0.41641476, 0.53918266], + [0.50325059, 0.46725516, 0.44523234], + ] + ) + * 10 + ) + for ii in range(2): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["coords"][0][ii][jj], + ovito_posis[ii][jj], + places=6, + msg="posis[%d][%d] failed" % (ii, jj), + ) diff --git a/tests/pwmat/config_ref_oh.py b/tests/pwmat/config_ref_oh.py index 7ce36791c..6f3e05619 100644 --- a/tests/pwmat/config_ref_oh.py +++ b/tests/pwmat/config_ref_oh.py @@ -1,37 +1,45 @@ import numpy as np -class Testconfigoh : +class Testconfigoh: def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [1,1]) + self.assertEqual(self.system.data["atom_numbs"], [1, 1]) def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['H','O']) + self.assertEqual(self.system.data["atom_names"], ["H", "O"]) def test_atom_types(self): - self.assertEqual(self.system.data['atom_types'][0], 0) - self.assertEqual(self.system.data['atom_types'][1], 1) + self.assertEqual(self.system.data["atom_types"][0], 0) + self.assertEqual(self.system.data["atom_types"][1], 1) def test_orig(self): - for d0 in range(3) : - self.assertEqual(self.system.data['orig'][d0], 0) + for d0 in range(3): + self.assertEqual(self.system.data["orig"][d0], 0) def test_cell(self): - ovito_cell = np.array([[2.5243712, 0.0000000, 0.0000000], - [1.2621856, 2.0430257, 0.0000000], - [1.2874292, 0.7485898, 2.2254033]]) - for ii in range(3) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['cells'][0][ii][jj], - ovito_cell[ii][jj], - places = 6, - msg = 'cell[%d][%d] failed' % (ii,jj)) + ovito_cell = np.array( + [ + [2.5243712, 0.0000000, 0.0000000], + [1.2621856, 2.0430257, 0.0000000], + [1.2874292, 0.7485898, 2.2254033], + ] + ) + for ii in range(3): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["cells"][0][ii][jj], + ovito_cell[ii][jj], + places=6, + msg="cell[%d][%d] failed" % (ii, jj), + ) - def test_frame(self): - ovito_posis = np.array([[1.2621856, 0.7018028, 0.5513885],[0, 0, 0]]) - for ii in range(2) : - for jj in range(3) : - self.assertAlmostEqual(self.system.data['coords'][0][ii][jj], - ovito_posis[ii][jj], - places = 6, - msg = 'posis[%d][%d] failed' % (ii,jj)) + def test_frame(self): + ovito_posis = np.array([[1.2621856, 0.7018028, 0.5513885], [0, 0, 0]]) + for ii in range(2): + for jj in range(3): + self.assertAlmostEqual( + self.system.data["coords"][0][ii][jj], + ovito_posis[ii][jj], + places=6, + msg="posis[%d][%d] failed" % (ii, jj), + ) diff --git a/tests/test_abacus_md.py b/tests/test_abacus_md.py index 3621242db..89df93b65 100644 --- a/tests/test_abacus_md.py +++ b/tests/test_abacus_md.py @@ -6,147 +6,203 @@ bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSMD: - def test_atom_names(self) : - self.assertEqual(self.system_water.data['atom_names'], ['H', 'O']) - self.assertEqual(self.system_Si.data['atom_names'], ['Si']) - self.assertEqual(self.system_water_unconv.data['atom_names'], ['H', 'O']) +class TestABACUSMD: + def test_atom_names(self): + self.assertEqual(self.system_water.data["atom_names"], ["H", "O"]) + self.assertEqual(self.system_Si.data["atom_names"], ["Si"]) + self.assertEqual(self.system_water_unconv.data["atom_names"], ["H", "O"]) - def test_atom_numbs(self) : - self.assertEqual(self.system_water.data['atom_numbs'], [2, 1]) - self.assertEqual(self.system_Si.data['atom_numbs'], [2]) - self.assertEqual(self.system_water_unconv.data['atom_numbs'], [2, 1]) + def test_atom_numbs(self): + self.assertEqual(self.system_water.data["atom_numbs"], [2, 1]) + self.assertEqual(self.system_Si.data["atom_numbs"], [2]) + self.assertEqual(self.system_water_unconv.data["atom_numbs"], [2, 1]) - def test_atom_types(self) : + def test_atom_types(self): ref_type = [0, 0, 1] - ref_type = np.array(ref_type) + ref_type = np.array(ref_type) ref_type2 = np.array([0, 0]) - ref_type3 = np.array([0,0,1]) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_water.data['atom_types'][ii], ref_type[ii]) - for ii in range(ref_type2.shape[0]) : - self.assertEqual(self.system_Si.data['atom_types'][ii], ref_type2[ii]) - for ii in range(ref_type3.shape[0]) : - self.assertEqual(self.system_water_unconv.data['atom_types'][ii], ref_type3[ii]) - - def test_cell(self) : + ref_type3 = np.array([0, 0, 1]) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_water.data["atom_types"][ii], ref_type[ii]) + for ii in range(ref_type2.shape[0]): + self.assertEqual(self.system_Si.data["atom_types"][ii], ref_type2[ii]) + for ii in range(ref_type3.shape[0]): + self.assertEqual( + self.system_water_unconv.data["atom_types"][ii], ref_type3[ii] + ) + + def test_cell(self): cell = bohr2ang * 28 * np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) cell2 = bohr2ang * 5.1 * np.array([[1, 1, 0], [1, 0, 1], [0, 1, 1]]) - cell3 = np.array([[1.45245092e+01, 0, 0], - [-1.40550526e-02, 1.51277202e+01, 0], - [-4.42369435e-01, 4.17648184e-01, 1.49535208e+01]]) - for idx in range(np.shape(self.system_water.data['cells'])[0]): - np.testing.assert_almost_equal(cell, self.system_water.data['cells'][idx], decimal = 5) - for idx in range(np.shape(self.system_Si.data['cells'])[0]): - np.testing.assert_almost_equal(self.system_Si.data['cells'][idx], cell2, decimal = 5) - for idx in range(np.shape(self.system_water_unconv.data['cells'])[0]): - np.testing.assert_almost_equal(self.system_water_unconv.data['cells'][idx], cell3, decimal = 5) - - def test_coord(self) : - with open('abacus.md/water_coord') as fp: + cell3 = np.array( + [ + [1.45245092e01, 0, 0], + [-1.40550526e-02, 1.51277202e01, 0], + [-4.42369435e-01, 4.17648184e-01, 1.49535208e01], + ] + ) + for idx in range(np.shape(self.system_water.data["cells"])[0]): + np.testing.assert_almost_equal( + cell, self.system_water.data["cells"][idx], decimal=5 + ) + for idx in range(np.shape(self.system_Si.data["cells"])[0]): + np.testing.assert_almost_equal( + self.system_Si.data["cells"][idx], cell2, decimal=5 + ) + for idx in range(np.shape(self.system_water_unconv.data["cells"])[0]): + np.testing.assert_almost_equal( + self.system_water_unconv.data["cells"][idx], cell3, decimal=5 + ) + + def test_coord(self): + with open("abacus.md/water_coord") as fp: coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) coord = coord.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system_water.data['coords'], coord, decimal = 5) + np.testing.assert_almost_equal( + self.system_water.data["coords"], coord, decimal=5 + ) - with open('abacus.md.nostress/Si_coord') as fp2: + with open("abacus.md.nostress/Si_coord") as fp2: coord = [] - for ii in fp2 : + for ii in fp2: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) coord = coord.reshape([4, 2, 3]) - np.testing.assert_almost_equal(self.system_Si.data['coords'], coord, decimal = 5) + np.testing.assert_almost_equal( + self.system_Si.data["coords"], coord, decimal=5 + ) - with open('abacus.md.unconv/water_coord') as fp3: + with open("abacus.md.unconv/water_coord") as fp3: coord = [] - for ii in fp3 : + for ii in fp3: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) coord = coord.reshape([10, 3, 3]) - np.testing.assert_almost_equal(self.system_water_unconv.data['coords'], coord, decimal = 5) + np.testing.assert_almost_equal( + self.system_water_unconv.data["coords"], coord, decimal=5 + ) - def test_force(self) : - with open('abacus.md/water_force') as fp: + def test_force(self): + with open("abacus.md/water_force") as fp: force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) force = force.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system_water.data['forces'], force, decimal=5) + np.testing.assert_almost_equal( + self.system_water.data["forces"], force, decimal=5 + ) - with open('abacus.md.nostress/Si_force') as fp2: + with open("abacus.md.nostress/Si_force") as fp2: force = [] - for ii in fp2 : + for ii in fp2: force.append([float(jj) for jj in ii.split()]) force = np.array(force) force = force.reshape([4, 2, 3]) - np.testing.assert_almost_equal(self.system_Si.data['forces'], force, decimal=5) + np.testing.assert_almost_equal( + self.system_Si.data["forces"], force, decimal=5 + ) - with open('abacus.md.unconv/water_force') as fp3: + with open("abacus.md.unconv/water_force") as fp3: force = [] - for ii in fp3 : + for ii in fp3: force.append([float(jj) for jj in ii.split()]) force = np.array(force) force = force.reshape([10, 3, 3]) - np.testing.assert_almost_equal(self.system_water_unconv.data['forces'], force, decimal=5) + np.testing.assert_almost_equal( + self.system_water_unconv.data["forces"], force, decimal=5 + ) - def test_virial(self) : - with open('abacus.md/water_virial') as fp: + def test_virial(self): + with open("abacus.md/water_virial") as fp: virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) virial = virial.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system_water.data['virials'], virial, decimal=5) + np.testing.assert_almost_equal( + self.system_water.data["virials"], virial, decimal=5 + ) - with open('abacus.md.unconv/water_virial') as fp: + with open("abacus.md.unconv/water_virial") as fp: virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) virial = virial.reshape([10, 3, 3]) - np.testing.assert_almost_equal(self.system_water_unconv.data['virials'], virial, decimal=5) - - def test_energy(self) : - ref_energy = np.array([-466.69285117, -466.69929051, -466.69829826, -466.70364664, - -466.6976083]) - ref_energy2 = np.array([-211.77184603, -211.78111966, -211.79681663, -211.79875524]) - ref_energy3 = np.array([-464.87380991, -465.18489358, -465.97407849, -465.98292836, -465.85528926, - -465.33957501, -464.64886682, -464.61802032, -465.61854656, -466.05660096]) - np.testing.assert_almost_equal(self.system_water.data['energies'], ref_energy) - np.testing.assert_almost_equal(self.system_Si.data['energies'], ref_energy2) - np.testing.assert_almost_equal(self.system_water_unconv.data['energies'], ref_energy3) + np.testing.assert_almost_equal( + self.system_water_unconv.data["virials"], virial, decimal=5 + ) + + def test_energy(self): + ref_energy = np.array( + [-466.69285117, -466.69929051, -466.69829826, -466.70364664, -466.6976083] + ) + ref_energy2 = np.array( + [-211.77184603, -211.78111966, -211.79681663, -211.79875524] + ) + ref_energy3 = np.array( + [ + -464.87380991, + -465.18489358, + -465.97407849, + -465.98292836, + -465.85528926, + -465.33957501, + -464.64886682, + -464.61802032, + -465.61854656, + -466.05660096, + ] + ) + np.testing.assert_almost_equal(self.system_water.data["energies"], ref_energy) + np.testing.assert_almost_equal(self.system_Si.data["energies"], ref_energy2) + np.testing.assert_almost_equal( + self.system_water_unconv.data["energies"], ref_energy3 + ) def test_to_system(self): - pp_file=["H.upf","O.upf"] - numerical_orbital=["H.upf","O.upf"] - numerical_descriptor="jle.orb" - mass=[1.008,15.994] - self.system_water.to(file_name="abacus.md/water_stru",fmt='abacus/stru',pp_file=pp_file,\ - numerical_orbital=numerical_orbital,numerical_descriptor=numerical_descriptor,\ - mass=mass) - self.assertTrue(os.path.isfile('abacus.md/water_stru')) - if os.path.isfile('abacus.md/water_stru'): - with open('abacus.md/water_stru') as f: - iline=0 - for iline,l in enumerate(f): + pp_file = ["H.upf", "O.upf"] + numerical_orbital = ["H.upf", "O.upf"] + numerical_descriptor = "jle.orb" + mass = [1.008, 15.994] + self.system_water.to( + file_name="abacus.md/water_stru", + fmt="abacus/stru", + pp_file=pp_file, + numerical_orbital=numerical_orbital, + numerical_descriptor=numerical_descriptor, + mass=mass, + ) + self.assertTrue(os.path.isfile("abacus.md/water_stru")) + if os.path.isfile("abacus.md/water_stru"): + with open("abacus.md/water_stru") as f: + iline = 0 + for iline, l in enumerate(f): iline += 1 - self.assertEqual(iline,30) + self.assertEqual(iline, 30) class TestABACUSMDLabeledOutput(unittest.TestCase, TestABACUSMD): - def setUp(self): - self.system_water = dpdata.LabeledSystem('abacus.md',fmt='abacus/md') # system with stress - self.system_Si = dpdata.LabeledSystem('abacus.md.nostress',fmt='abacus/md') # system without stress - self.system_water_unconv = dpdata.LabeledSystem('abacus.md.unconv',fmt='abacus/md') #system with unconverged SCF + self.system_water = dpdata.LabeledSystem( + "abacus.md", fmt="abacus/md" + ) # system with stress + self.system_Si = dpdata.LabeledSystem( + "abacus.md.nostress", fmt="abacus/md" + ) # system without stress + self.system_water_unconv = dpdata.LabeledSystem( + "abacus.md.unconv", fmt="abacus/md" + ) # system with unconverged SCF def tearDown(self): - if os.path.isfile('abacus.md/water_stru'): - os.remove('abacus.md/water_stru') + if os.path.isfile("abacus.md/water_stru"): + os.remove("abacus.md/water_stru") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_abacus_pw_scf.py b/tests/test_abacus_pw_scf.py index f2a2c89ba..fc06f2278 100644 --- a/tests/test_abacus_pw_scf.py +++ b/tests/test_abacus_pw_scf.py @@ -1,37 +1,39 @@ import os import numpy as np -import unittest,shutil +import unittest, shutil from context import dpdata from dpdata.unit import LengthConversion bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSSinglePointEnergy: - def test_atom_names(self) : - self.assertEqual(self.system_ch4.data['atom_names'], ['C', 'H']) - #self.assertEqual(self.system_h2o.data['atom_names'], ['O','H']) - self.assertEqual(self.system_ch4_unlabeled.data['atom_names'], ['C', 'H']) - def test_atom_numbs(self) : - self.assertEqual(self.system_ch4.data['atom_numbs'], [1, 4]) - #self.assertEqual(self.system_h2o.data['atom_numbs'], [64,128]) - self.assertEqual(self.system_ch4_unlabeled.data['atom_numbs'], [1, 4]) - def test_atom_types(self) : - ref_type = [0,1,1,1,1] - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_ch4.data['atom_types'][ii], ref_type[ii]) - self.assertEqual(self.system_ch4_unlabeled['atom_types'][ii], ref_type[ii]) +class TestABACUSSinglePointEnergy: + def test_atom_names(self): + self.assertEqual(self.system_ch4.data["atom_names"], ["C", "H"]) + # self.assertEqual(self.system_h2o.data['atom_names'], ['O','H']) + self.assertEqual(self.system_ch4_unlabeled.data["atom_names"], ["C", "H"]) + + def test_atom_numbs(self): + self.assertEqual(self.system_ch4.data["atom_numbs"], [1, 4]) + # self.assertEqual(self.system_h2o.data['atom_numbs'], [64,128]) + self.assertEqual(self.system_ch4_unlabeled.data["atom_numbs"], [1, 4]) + + def test_atom_types(self): + ref_type = [0, 1, 1, 1, 1] + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_ch4.data["atom_types"][ii], ref_type[ii]) + self.assertEqual(self.system_ch4_unlabeled["atom_types"][ii], ref_type[ii]) # ref_type = [0]*64 + [1]*128 # ref_type = np.array(ref_type) # for ii in range(ref_type.shape[0]) : # self.assertEqual(self.system_h2o.data['atom_types'][ii], ref_type[ii]) - def test_cell(self) : + def test_cell(self): # cell = 5.29177 * np.eye(3) cell = bohr2ang * 10 * np.eye(3) - np.testing.assert_almost_equal(self.system_ch4.data['cells'][0], cell) - np.testing.assert_almost_equal(self.system_ch4_unlabeled.data['cells'][0], cell) + np.testing.assert_almost_equal(self.system_ch4.data["cells"][0], cell) + np.testing.assert_almost_equal(self.system_ch4_unlabeled.data["cells"][0], cell) # fp = open('qe.scf/h2o_cell') # cell = [] # for ii in fp : @@ -42,16 +44,18 @@ def test_cell(self) : # self.assertAlmostEqual(self.system_h2o.data['cells'][0][ii][jj], cell[ii][jj]) # fp.close() - - def test_coord(self) : - with open('abacus.scf/ch4_coord') as fp: + def test_coord(self): + with open("abacus.scf/ch4_coord") as fp: coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - np.testing.assert_almost_equal(self.system_ch4.data['coords'][0], coord, decimal=5) - np.testing.assert_almost_equal(self.system_ch4_unlabeled.data['coords'][0], coord, decimal=5) - + np.testing.assert_almost_equal( + self.system_ch4.data["coords"][0], coord, decimal=5 + ) + np.testing.assert_almost_equal( + self.system_ch4_unlabeled.data["coords"][0], coord, decimal=5 + ) # fp = open('qe.scf/h2o_coord') # coord = [] @@ -63,14 +67,13 @@ def test_coord(self) : # self.assertAlmostEqual(self.system_h2o.data['coords'][0][ii][jj], coord[ii][jj]) # fp.close() - def test_force(self) : - with open('abacus.scf/ch4_force') as fp: + def test_force(self): + with open("abacus.scf/ch4_force") as fp: force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - np.testing.assert_almost_equal(self.system_ch4.data['forces'][0], force) - + np.testing.assert_almost_equal(self.system_ch4.data["forces"][0], force) # fp = open('qe.scf/h2o_force') # force = [] @@ -82,14 +85,15 @@ def test_force(self) : # self.assertAlmostEqual(self.system_h2o.data['forces'][0][ii][jj], force[ii][jj]) # fp.close() - def test_virial(self) : - with open('abacus.scf/ch4_virial') as fp: + def test_virial(self): + with open("abacus.scf/ch4_virial") as fp: virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) - np.testing.assert_almost_equal(self.system_ch4.data['virials'][0], virial, decimal = 3) - + np.testing.assert_almost_equal( + self.system_ch4.data["virials"][0], virial, decimal=3 + ) # fp = open('qe.scf/h2o_virial') # virial = [] @@ -101,41 +105,43 @@ def test_virial(self) : # self.assertAlmostEqual(self.system_h2o.data['virials'][0][ii][jj], virial[ii][jj], places = 2) # fp.close() - def test_energy(self) : + def test_energy(self): ref_energy = -219.64991404276591 - self.assertAlmostEqual(self.system_ch4.data['energies'][0], ref_energy) + self.assertAlmostEqual(self.system_ch4.data["energies"][0], ref_energy) # ref_energy = -30007.651851226798 # self.assertAlmostEqual(self.system_h2o.data['energies'][0], ref_energy) - class TestABACUSLabeledOutput(unittest.TestCase, TestABACUSSinglePointEnergy): - def setUp(self): - shutil.copy('abacus.scf/INPUT.ok','abacus.scf/INPUT') - self.system_ch4 = dpdata.LabeledSystem('abacus.scf',fmt='abacus/scf') + shutil.copy("abacus.scf/INPUT.ok", "abacus.scf/INPUT") + self.system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") # self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') - self.system_ch4_unlabeled = dpdata.System('abacus.scf/STRU.ch4', fmt='abacus/stru') + self.system_ch4_unlabeled = dpdata.System( + "abacus.scf/STRU.ch4", fmt="abacus/stru" + ) + def tearDown(self): if os.path.isfile("abacus.scf/INPUT"): os.remove("abacus.scf/INPUT") class TestABACUSLabeledOutputFail(unittest.TestCase): - def setUp(self): - shutil.copy('abacus.scf/INPUT.fail','abacus.scf/INPUT') - self.system_ch4 = dpdata.LabeledSystem('abacus.scf',fmt='abacus/scf') + shutil.copy("abacus.scf/INPUT.fail", "abacus.scf/INPUT") + self.system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") # self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') - self.system_ch4_unlabeled = dpdata.System('abacus.scf/STRU.ch4', fmt='abacus/stru') + self.system_ch4_unlabeled = dpdata.System( + "abacus.scf/STRU.ch4", fmt="abacus/stru" + ) + def tearDown(self): if os.path.isfile("abacus.scf/INPUT"): os.remove("abacus.scf/INPUT") - def test_return_zero(self): - self.assertEqual(len(self.system_ch4),0) - + def test_return_zero(self): + self.assertEqual(len(self.system_ch4), 0) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_abacus_relax.py b/tests/test_abacus_relax.py index be331009a..a39249bd7 100644 --- a/tests/test_abacus_relax.py +++ b/tests/test_abacus_relax.py @@ -1,4 +1,4 @@ -import os,shutil +import os, shutil import numpy as np import unittest from context import dpdata @@ -6,91 +6,102 @@ bohr2ang = LengthConversion("bohr", "angstrom").value() -class TestABACUSRelaxLabeledOutput(unittest.TestCase): +class TestABACUSRelaxLabeledOutput(unittest.TestCase): def setUp(self): - shutil.copy('abacus.relax/OUT.abacus/running_cell-relax.log.normal','abacus.relax/OUT.abacus/running_cell-relax.log') - self.system = dpdata.LabeledSystem('abacus.relax',fmt='abacus/relax') + shutil.copy( + "abacus.relax/OUT.abacus/running_cell-relax.log.normal", + "abacus.relax/OUT.abacus/running_cell-relax.log", + ) + self.system = dpdata.LabeledSystem("abacus.relax", fmt="abacus/relax") + def tearDown(self): if os.path.isfile("abacus.relax/OUT.abacus/running_cell-relax.log"): os.remove("abacus.relax/OUT.abacus/running_cell-relax.log") - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['H','O']) + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["H", "O"]) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [2,1]) + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [2, 1]) - def test_atom_types(self) : - ref_type = np.array([0,0,1]) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system.data['atom_types'][ii], ref_type[ii]) + def test_atom_types(self): + ref_type = np.array([0, 0, 1]) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system.data["atom_types"][ii], ref_type[ii]) - def test_cell(self) : + def test_cell(self): cell = bohr2ang * 28.0 * np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) - for idx in range(np.shape(self.system.data['cells'])[0]): - np.testing.assert_almost_equal(cell, self.system.data['cells'][idx], decimal = 5) + for idx in range(np.shape(self.system.data["cells"])[0]): + np.testing.assert_almost_equal( + cell, self.system.data["cells"][idx], decimal=5 + ) - def test_coord(self) : - with open('abacus.relax/coord.ref') as fp: + def test_coord(self): + with open("abacus.relax/coord.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['coords'], ref, decimal = 5) + np.testing.assert_almost_equal(self.system.data["coords"], ref, decimal=5) - def test_force(self) : - with open('abacus.relax/force.ref') as fp: + def test_force(self): + with open("abacus.relax/force.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['forces'], ref, decimal=5) + np.testing.assert_almost_equal(self.system.data["forces"], ref, decimal=5) - def test_virial(self) : - with open('abacus.relax/virial.ref') as fp: + def test_virial(self): + with open("abacus.relax/virial.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['virials'], ref, decimal=5) + np.testing.assert_almost_equal(self.system.data["virials"], ref, decimal=5) - def test_stress(self) : - with open('abacus.relax/stress.ref') as fp: + def test_stress(self): + with open("abacus.relax/stress.ref") as fp: ref = [] - for ii in fp : + for ii in fp: ref.append([float(jj) for jj in ii.split()]) ref = np.array(ref) ref = ref.reshape([5, 3, 3]) - np.testing.assert_almost_equal(self.system.data['stress'], ref, decimal=5) + np.testing.assert_almost_equal(self.system.data["stress"], ref, decimal=5) - def test_energy(self) : - ref_energy = np.array([-465.77753104, -464.35757552, -465.79307346, -465.80056811, - -465.81235433]) - np.testing.assert_almost_equal(self.system.data['energies'], ref_energy) + def test_energy(self): + ref_energy = np.array( + [-465.77753104, -464.35757552, -465.79307346, -465.80056811, -465.81235433] + ) + np.testing.assert_almost_equal(self.system.data["energies"], ref_energy) -class TestABACUSRelaxLabeledOutputAbnormal(unittest.TestCase): +class TestABACUSRelaxLabeledOutputAbnormal(unittest.TestCase): def setUp(self): - shutil.copy('abacus.relax/OUT.abacus/running_cell-relax.log.abnormal','abacus.relax/OUT.abacus/running_cell-relax.log') - self.system = dpdata.LabeledSystem('abacus.relax',fmt='abacus/relax') - - def test_result(self): + shutil.copy( + "abacus.relax/OUT.abacus/running_cell-relax.log.abnormal", + "abacus.relax/OUT.abacus/running_cell-relax.log", + ) + self.system = dpdata.LabeledSystem("abacus.relax", fmt="abacus/relax") + + def test_result(self): data = self.system.data - self.assertEqual(len(data['coords']),4) - self.assertEqual(len(data['energies']),len(data['coords'])) - self.assertEqual(len(data['cells']),len(data['coords'])) - self.assertEqual(len(data['forces']),len(data['coords'])) - self.assertEqual(len(data['stress']),len(data['coords'])) - self.assertEqual(len(data['virials']),len(data['coords'])) - np.testing.assert_almost_equal(data['energies'][3],-465.81235433) - + self.assertEqual(len(data["coords"]), 4) + self.assertEqual(len(data["energies"]), len(data["coords"])) + self.assertEqual(len(data["cells"]), len(data["coords"])) + self.assertEqual(len(data["forces"]), len(data["coords"])) + self.assertEqual(len(data["stress"]), len(data["coords"])) + self.assertEqual(len(data["virials"]), len(data["coords"])) + np.testing.assert_almost_equal(data["energies"][3], -465.81235433) + def tearDown(self): if os.path.isfile("abacus.relax/OUT.abacus/running_cell-relax.log"): os.remove("abacus.relax/OUT.abacus/running_cell-relax.log") -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_abacus_stru_dump.py b/tests/test_abacus_stru_dump.py index 20922cbf7..6d6dbeea8 100644 --- a/tests/test_abacus_stru_dump.py +++ b/tests/test_abacus_stru_dump.py @@ -10,13 +10,20 @@ def setUp(self): self.system_ch4 = dpdata.System("abacus.scf/STRU.ch4", fmt="stru") def test_dump_stru(self): - self.system_ch4.to("stru", "STRU_tmp", mass = [12, 1], pp_file = ["C.upf", "H.upf"], numerical_orbital = ["C.orb", "H.orb"], numerical_descriptor = "jle.orb") + self.system_ch4.to( + "stru", + "STRU_tmp", + mass=[12, 1], + pp_file=["C.upf", "H.upf"], + numerical_orbital=["C.orb", "H.orb"], + numerical_descriptor="jle.orb", + ) myfilecmp(self, "abacus.scf/stru_test", "STRU_tmp") - + def tearDown(self): - if os.path.isfile('STRU_tmp'): - os.remove('STRU_tmp') + if os.path.isfile("STRU_tmp"): + os.remove("STRU_tmp") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - \ No newline at end of file diff --git a/tests/test_amber_md.py b/tests/test_amber_md.py index d3189a0d6..5a9cded81 100644 --- a/tests/test_amber_md.py +++ b/tests/test_amber_md.py @@ -3,36 +3,46 @@ import shutil from context import dpdata from comp_sys import CompLabeledSys, IsPBC + try: import parmed except ModuleNotFoundError: - skip_parmed_related_test=True + skip_parmed_related_test = True else: - skip_parmed_related_test=False + skip_parmed_related_test = False + class TestAmberMD(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('amber/02_Heat', fmt = 'amber/md') - self.system_1.to('deepmd/npy','tmp.deepmd.npy') - self.system_2 = dpdata.LabeledSystem('tmp.deepmd.npy', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("amber/02_Heat", fmt="amber/md") + self.system_1.to("deepmd/npy", "tmp.deepmd.npy") + self.system_2 = dpdata.LabeledSystem("tmp.deepmd.npy", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.npy'): - shutil.rmtree('tmp.deepmd.npy') + def tearDown(self): + if os.path.exists("tmp.deepmd.npy"): + shutil.rmtree("tmp.deepmd.npy") -@unittest.skipIf(skip_parmed_related_test,"skip parmed related test. install parmed to fix") + +@unittest.skipIf( + skip_parmed_related_test, "skip parmed related test. install parmed to fix" +) class TestAmberMDTarget(unittest.TestCase, CompLabeledSys, IsPBC): def setUp(self): - ll="amber/corr/low_level" - ncfile="amber/corr/rc.nc" - parmfile="amber/corr/qmmm.parm7" + ll = "amber/corr/low_level" + ncfile = "amber/corr/rc.nc" + parmfile = "amber/corr/qmmm.parm7" target = ":1" self.system_1 = dpdata.LabeledSystem( - ll, nc_file=ncfile, parm7_file=parmfile, fmt='amber/md', use_element_symbols=target) + ll, + nc_file=ncfile, + parm7_file=parmfile, + fmt="amber/md", + use_element_symbols=target, + ) self.system_2 = dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy") self.places = 5 @@ -40,5 +50,6 @@ def setUp(self): self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_amber_sqm.py b/tests/test_amber_sqm.py index c8f762ba8..f9ca80f8d 100644 --- a/tests/test_amber_sqm.py +++ b/tests/test_amber_sqm.py @@ -11,64 +11,70 @@ else: skip_bond_order_system = False + class TestAmberSqmOut(unittest.TestCase, CompSys, IsNoPBC): - def setUp (self) : - self.system_1 = dpdata.System('amber/sqm_no_forces.out', fmt = 'sqm/out') - self.system_1.to('deepmd/npy','tmp.sqm.noforces') - self.system_2 = dpdata.System('tmp.sqm.noforces', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.System("amber/sqm_no_forces.out", fmt="sqm/out") + self.system_1.to("deepmd/npy", "tmp.sqm.noforces") + self.system_2 = dpdata.System("tmp.sqm.noforces", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.sqm.noforces'): - shutil.rmtree('tmp.sqm.noforces') + def tearDown(self): + if os.path.exists("tmp.sqm.noforces"): + shutil.rmtree("tmp.sqm.noforces") + class TestAmberSqmOutLabeled(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp(self) : - self.system_1 = dpdata.LabeledSystem('amber/sqm_forces.out', fmt = 'sqm/out') - self.system_1.to('deepmd/npy','tmp.sqm.forces') - self.system_2 = dpdata.LabeledSystem('tmp.sqm.forces', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("amber/sqm_forces.out", fmt="sqm/out") + self.system_1.to("deepmd/npy", "tmp.sqm.forces") + self.system_2 = dpdata.LabeledSystem("tmp.sqm.forces", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.sqm.forces'): - shutil.rmtree('tmp.sqm.forces') + def tearDown(self): + if os.path.exists("tmp.sqm.forces"): + shutil.rmtree("tmp.sqm.forces") class TestAmberSqmOutOpt(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp(self) : - self.system_1 = dpdata.LabeledSystem('amber/sqm_opt.out', fmt = 'sqm/out') - self.system_1.to('deepmd/npy','tmp.sqm.opt') - self.system_2 = dpdata.LabeledSystem('tmp.sqm.opt', fmt = 'deepmd/npy') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out") + self.system_1.to("deepmd/npy", "tmp.sqm.opt") + self.system_2 = dpdata.LabeledSystem("tmp.sqm.opt", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.sqm.opt'): - shutil.rmtree('tmp.sqm.opt') + def tearDown(self): + if os.path.exists("tmp.sqm.opt"): + shutil.rmtree("tmp.sqm.opt") -@unittest.skipIf(skip_bond_order_system, "dpdata does not have BondOrderSystem. One may install rdkit to fix.") +@unittest.skipIf( + skip_bond_order_system, + "dpdata does not have BondOrderSystem. One may install rdkit to fix.", +) class TestAmberSqmIn(unittest.TestCase): def setUp(self): - self.system = dpdata.BondOrderSystem("amber/methane.mol", fmt='mol', type_map=['H','C']) - with open('amber/sqm.in', 'r') as f: + self.system = dpdata.BondOrderSystem( + "amber/methane.mol", fmt="mol", type_map=["H", "C"] + ) + with open("amber/sqm.in", "r") as f: self.sqm_in = f.read() - + def test_sqm_in(self): - self.system.to("sqm/in", 'amber/sqm_test.in') - with open('amber/sqm_test.in', 'r') as f: + self.system.to("sqm/in", "amber/sqm_test.in") + with open("amber/sqm_test.in", "r") as f: self.sqm_in_test = f.read() self.assertEqual(self.sqm_in, self.sqm_in_test) - + def tearDown(self): if os.path.isfile("amber/sqm_test.in"): os.remove("amber/sqm_test.in") - diff --git a/tests/test_ase_traj.py b/tests/test_ase_traj.py index 6c37a31ce..6f957f848 100644 --- a/tests/test_ase_traj.py +++ b/tests/test_ase_traj.py @@ -3,6 +3,7 @@ import unittest from context import dpdata from comp_sys import CompLabeledSys, IsPBC + try: import ase except ModuleNotFoundError: @@ -10,31 +11,37 @@ else: skip_ase = False -@unittest.skipIf(skip_ase,"skip ase related test. install ase to fix") + +@unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestASEtraj1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems = dpdata.MultiSystems.from_file('ase_traj/HeAlO.traj', fmt='ase_traj/structure') - self.system_1 = self.multi_systems.systems['Al0He4O0'] - self.system_2 = dpdata.LabeledSystem('ase_traj/Al0He4O0', fmt='deepmd') + def setUp(self): + self.multi_systems = dpdata.MultiSystems.from_file( + "ase_traj/HeAlO.traj", fmt="ase_traj/structure" + ) + self.system_1 = self.multi_systems.systems["Al0He4O0"] + self.system_2 = dpdata.LabeledSystem("ase_traj/Al0He4O0", fmt="deepmd") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -@unittest.skipIf(skip_ase,"skip ase related test. install ase to fix") + +@unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestASEtraj1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_temp0 = dpdata.MultiSystems.from_file(file_name='ase_traj/HeAlO.traj', fmt='ase/structure') - self.system_1 = self.system_temp0.systems['Al2He1O3'] # .sort_atom_types() - self.system_temp1 = dpdata.LabeledSystem('ase_traj/Al2He1O3', fmt='deepmd') - self.system_temp2 = dpdata.LabeledSystem('ase_traj/Al4He4O6', fmt='deepmd') + def setUp(self): + self.system_temp0 = dpdata.MultiSystems.from_file( + file_name="ase_traj/HeAlO.traj", fmt="ase/structure" + ) + self.system_1 = self.system_temp0.systems["Al2He1O3"] # .sort_atom_types() + self.system_temp1 = dpdata.LabeledSystem("ase_traj/Al2He1O3", fmt="deepmd") + self.system_temp2 = dpdata.LabeledSystem("ase_traj/Al4He4O6", fmt="deepmd") self.system_temp3 = dpdata.MultiSystems(self.system_temp2, self.system_temp1) - self.system_2 = self.system_temp3.systems['Al2He1O3'] + self.system_2 = self.system_temp3.systems["Al2He1O3"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_bond_order_system.py b/tests/test_bond_order_system.py index 95c595fee..d0b8fbd07 100644 --- a/tests/test_bond_order_system.py +++ b/tests/test_bond_order_system.py @@ -2,6 +2,7 @@ import unittest from context import dpdata import glob + try: from rdkit import Chem from rdkit.Chem import AllChem @@ -16,9 +17,11 @@ from copy import deepcopy -@unittest.skipIf(skip_bond_order_system, "dpdata does not have BondOrderSystem. One may install rdkit to fix.") +@unittest.skipIf( + skip_bond_order_system, + "dpdata does not have BondOrderSystem. One may install rdkit to fix.", +) class TestBondOrderSystem(unittest.TestCase): - def test_from_rdkit_mol(self): mol = Chem.MolFromSmiles("CC") mol = Chem.AddHs(mol) @@ -28,44 +31,66 @@ def test_from_rdkit_mol(self): self.assertEqual(system.get_nbonds(), 7) def test_from_mol_file(self): - syst = dpdata.BondOrderSystem("bond_order/CH3OH.mol", fmt='mol', type_map=['O','C','H']) + syst = dpdata.BondOrderSystem( + "bond_order/CH3OH.mol", fmt="mol", type_map=["O", "C", "H"] + ) self.assertEqual(syst.get_nframes(), 1) self.assertEqual(syst.get_nbonds(), 5) self.assertEqual(syst.get_natoms(), 6) - self.assertEqual(syst['atom_names'], ['O','C','H']) - self.assertAlmostEqual(syst['coords'][0][0][0], -0.3858) - + self.assertEqual(syst["atom_names"], ["O", "C", "H"]) + self.assertAlmostEqual(syst["coords"][0][0][0], -0.3858) + def test_from_sdf_file(self): - syst = dpdata.BondOrderSystem("bond_order/methane.sdf", type_map=['C','H']) + syst = dpdata.BondOrderSystem("bond_order/methane.sdf", type_map=["C", "H"]) self.assertEqual(syst.get_nframes(), 4) self.assertEqual(syst.get_nbonds(), 4) self.assertEqual(syst.get_natoms(), 5) - self.assertEqual(syst['atom_names'], ['C','H']) - self.assertAlmostEqual(syst['coords'][0][0][0], 0.0059) - self.assertAlmostEqual(syst['coords'][1][0][0], 0.0043) - self.assertAlmostEqual(syst['coords'][2][0][0], 0.0071) - self.assertAlmostEqual(syst['coords'][3][0][0], 0.0032) - + self.assertEqual(syst["atom_names"], ["C", "H"]) + self.assertAlmostEqual(syst["coords"][0][0][0], 0.0059) + self.assertAlmostEqual(syst["coords"][1][0][0], 0.0043) + self.assertAlmostEqual(syst["coords"][2][0][0], 0.0071) + self.assertAlmostEqual(syst["coords"][3][0][0], 0.0032) + def test_from_sdf_file_err(self): - self.assertRaises(ValueError, dpdata.BondOrderSystem, "bond_order/methane_ethane.sdf") + self.assertRaises( + ValueError, dpdata.BondOrderSystem, "bond_order/methane_ethane.sdf" + ) def test_regularize_formal_charges(self): - non_regular = Chem.MolFromMolFile("bond_order/formal_charge.mol", removeHs=False) + non_regular = Chem.MolFromMolFile( + "bond_order/formal_charge.mol", removeHs=False + ) regular = dpdata.BondOrderSystem("bond_order/formal_charge.mol", fmt="mol") self.assertFalse(non_regular) self.assertTrue(isinstance(regular.rdkit_mol, Chem.rdchem.Mol)) - + def test_formal_charge(self): - names = ["C5H5-", "CH3CC-", "CH3NC", "CH3NH3+", "CH3NO2", "OCH3+", - "gly", "arg", "oxpy", "CH3OPO3_2-", "CH3PH3+", "CH3OAsO3_2-", - "CH3SH", "CH3_2SO", "CH3_2SO2", "CH3SO3-", "BOH4-"] + names = [ + "C5H5-", + "CH3CC-", + "CH3NC", + "CH3NH3+", + "CH3NO2", + "OCH3+", + "gly", + "arg", + "oxpy", + "CH3OPO3_2-", + "CH3PH3+", + "CH3OAsO3_2-", + "CH3SH", + "CH3_2SO", + "CH3_2SO2", + "CH3SO3-", + "BOH4-", + ] charges = [-1, -1, 0, 1, 0, 1, 0, 1, 0, -2, 1, -2, 0, 0, 0, -1, -1] mols = [dpdata.BondOrderSystem(f"bond_order/{name}.mol") for name in names] self.assertEqual(charges, [mol.get_charge() for mol in mols]) def test_read_other_format_without_bond_info(self): self.assertRaises(RuntimeError, dpdata.BondOrderSystem, "gromacs/1h.gro") - + def test_dump_to_deepmd_raw(self): syst = dpdata.BondOrderSystem("bond_order/methane.sdf", fmt="sdf") syst.to_deepmd_raw("bond_order/methane") @@ -74,9 +99,9 @@ def test_dump_to_deepmd_raw(self): bonds = np.loadtxt("bond_order/methane/bonds.raw") for bond_idx in range(4): for ii in range(3): - self.assertEqual(syst['bonds'][bond_idx][ii], bonds[bond_idx][ii]) + self.assertEqual(syst["bonds"][bond_idx][ii], bonds[bond_idx][ii]) shutil.rmtree("bond_order/methane") - + def test_dump_to_deepmd_npy(self): syst = dpdata.BondOrderSystem("bond_order/methane.sdf", fmt="sdf") syst.to_deepmd_npy("bond_order/methane") @@ -85,9 +110,9 @@ def test_dump_to_deepmd_npy(self): bonds = np.loadtxt("bond_order/methane/bonds.raw") for bond_idx in range(4): for ii in range(3): - self.assertEqual(syst['bonds'][bond_idx][ii], bonds[bond_idx][ii]) + self.assertEqual(syst["bonds"][bond_idx][ii], bonds[bond_idx][ii]) shutil.rmtree("bond_order/methane") - + def test_dump_to_sdf_file(self): s1 = dpdata.BondOrderSystem("bond_order/methane.sdf", fmt="sdf") s2 = deepcopy(s1) @@ -97,26 +122,31 @@ def test_dump_to_sdf_file(self): nsyst = dpdata.BondOrderSystem("bond_order/test.sdf", fmt="sdf") self.assertEqual(nsyst["coords"][0, 0, 0] - s1["coords"][0, 0, 0], 1.0) os.remove("bond_order/test.sdf") - + def test_sanitize_mol_obabel(self): cnt = 0 for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): - syst = dpdata.BondOrderSystem(sdf_file, sanitize_level='high', verbose=False) + syst = dpdata.BondOrderSystem( + sdf_file, sanitize_level="high", verbose=False + ) if syst.rdkit_mol is None: cnt += 1 self.assertEqual(cnt, 0) - + def test_sanitize_mol_origin(self): cnt = 0 for sdf_file in glob.glob("bond_order/refined-set-ligands/origin/*sdf"): - syst = dpdata.BondOrderSystem(sdf_file, sanitize_level='high', verbose=False) + syst = dpdata.BondOrderSystem( + sdf_file, sanitize_level="high", verbose=False + ) if syst.rdkit_mol is None: cnt += 1 self.assertEqual(cnt, 0) - + def tearDown(self): if os.path.exists("tests/.cache"): shutil.rmtree("tests/.cache") -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_cell_to_low_triangle.py b/tests/test_cell_to_low_triangle.py index ca60b35d1..6696e1710 100644 --- a/tests/test_cell_to_low_triangle.py +++ b/tests/test_cell_to_low_triangle.py @@ -3,47 +3,65 @@ import unittest from context import dpdata + class TestCellToLowTriangle(unittest.TestCase): def test_func1(self): - cell_1 = dpdata.cp2k.cell.cell_to_low_triangle(6,6,6,np.pi*1/2, np.pi*1/2, np.pi*1/2) - cell_2 = np.asarray([[6,0,0],[0,6,0],[0,0,6]]) + cell_1 = dpdata.cp2k.cell.cell_to_low_triangle( + 6, 6, 6, np.pi * 1 / 2, np.pi * 1 / 2, np.pi * 1 / 2 + ) + cell_2 = np.asarray([[6, 0, 0], [0, 6, 0], [0, 0, 6]]) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(cell_1[ii,jj], cell_2[ii,jj], places=6) + self.assertAlmostEqual(cell_1[ii, jj], cell_2[ii, jj], places=6) def test_func2(self): - cell_1 = dpdata.cp2k.cell.cell_to_low_triangle(6,6,6,np.pi*1/3, np.pi*1/3, np.pi*1/3) - cell_2 = np.asarray([ - [6,0,0], - [3,3*np.sqrt(3),0], - [3,np.sqrt(3),2*np.sqrt(6)]]) + cell_1 = dpdata.cp2k.cell.cell_to_low_triangle( + 6, 6, 6, np.pi * 1 / 3, np.pi * 1 / 3, np.pi * 1 / 3 + ) + cell_2 = np.asarray( + [[6, 0, 0], [3, 3 * np.sqrt(3), 0], [3, np.sqrt(3), 2 * np.sqrt(6)]] + ) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(cell_1[ii,jj], cell_2[ii,jj], places=6) + self.assertAlmostEqual(cell_1[ii, jj], cell_2[ii, jj], places=6) def test_func3(self): - cell_1 = dpdata.cp2k.cell.cell_to_low_triangle(6,7,8,np.pi*133/180,np.pi*84/180,np.pi*69/180) - cell_2 = np.asarray([[ 6.0, 0.0, 0.0], - [ 2.5085757, 6.535063 , 0.0], - [ 0.8362277, -6.1651506, 5.0290794]], dtype='float32') + cell_1 = dpdata.cp2k.cell.cell_to_low_triangle( + 6, 7, 8, np.pi * 133 / 180, np.pi * 84 / 180, np.pi * 69 / 180 + ) + cell_2 = np.asarray( + [ + [6.0, 0.0, 0.0], + [2.5085757, 6.535063, 0.0], + [0.8362277, -6.1651506, 5.0290794], + ], + dtype="float32", + ) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(cell_1[ii,jj], cell_2[ii,jj], places=6) + self.assertAlmostEqual(cell_1[ii, jj], cell_2[ii, jj], places=6) def test_func4(self): with self.assertRaises(Exception) as c: - dpdata.cp2k.cell.cell_to_low_triangle(0.1,6,6,np.pi*1/2,np.pi*1/2,np.pi*1/2) + dpdata.cp2k.cell.cell_to_low_triangle( + 0.1, 6, 6, np.pi * 1 / 2, np.pi * 1 / 2, np.pi * 1 / 2 + ) self.assertTrue("A==0.1" in str(c.exception)) def test_func5(self): with self.assertRaises(Exception) as c: - dpdata.cp2k.cell.cell_to_low_triangle(6,6,6,np.pi*3/180,np.pi*1/2,np.pi*1/2) + dpdata.cp2k.cell.cell_to_low_triangle( + 6, 6, 6, np.pi * 3 / 180, np.pi * 1 / 2, np.pi * 1 / 2 + ) self.assertTrue("alpha" in str(c.exception)) def test_func6(self): with self.assertRaises(Exception) as c: - dpdata.cp2k.cell.cell_to_low_triangle(6,7,8,np.pi*153/180,np.pi*84/180,np.pi*69/180) + dpdata.cp2k.cell.cell_to_low_triangle( + 6, 7, 8, np.pi * 153 / 180, np.pi * 84 / 180, np.pi * 69 / 180 + ) self.assertTrue("lz^2" in str(c.exception)) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_cli.py b/tests/test_cli.py index 3d6d29e47..7275237a7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -5,12 +5,22 @@ class TestCli(unittest.TestCase, TestPOSCARoh): - @classmethod def setUpClass(cls) -> None: - sp.check_output(["dpdata", "poscars/conf.lmp", "--type-map", "O", "H", "-olammps/lmp", "-O", "tmp.lmp", "--no-labeled"]) - cls.system = dpdata.System('tmp.lmp', fmt='lammps/lmp', - type_map = ['O', 'H']) + sp.check_output( + [ + "dpdata", + "poscars/conf.lmp", + "--type-map", + "O", + "H", + "-olammps/lmp", + "-O", + "tmp.lmp", + "--no-labeled", + ] + ) + cls.system = dpdata.System("tmp.lmp", fmt="lammps/lmp", type_map=["O", "H"]) @classmethod def tearDownClass(cls) -> None: diff --git a/tests/test_corr.py b/tests/test_corr.py index fd8fa2e78..74fb2a103 100644 --- a/tests/test_corr.py +++ b/tests/test_corr.py @@ -3,6 +3,7 @@ from comp_sys import CompLabeledSys from comp_sys import IsPBC + class TestCorr(unittest.TestCase, CompLabeledSys, IsPBC): """Make a test to get a correction of two systems. @@ -10,19 +11,20 @@ class TestCorr(unittest.TestCase, CompLabeledSys, IsPBC): --------- https://doi.org/10.26434/chemrxiv.14120447 """ + def setUp(self): - ll="amber/corr/low_level" - hl="amber/corr/high_level" - ncfile="amber/corr/rc.nc" - parmfile="amber/corr/qmmm.parm7" - ep = r'@%EP' + ll = "amber/corr/low_level" + hl = "amber/corr/high_level" + ncfile = "amber/corr/rc.nc" + parmfile = "amber/corr/qmmm.parm7" + ep = r"@%EP" target = ":1" - cutoff = 6. + cutoff = 6.0 interactwith = "(%s)<:%f&!%s" % (target, cutoff, ep) s_ll = dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy") s_hl = dpdata.LabeledSystem("amber/corr/dp_hl", fmt="deepmd/npy") self.system_1 = s_ll.correction(s_hl) - self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr" ,fmt="deepmd/npy") + self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 @@ -31,16 +33,21 @@ def setUp(self): class TestCorr(unittest.TestCase, CompLabeledSys, IsPBC): """Make a test to get a correction of two MultiSystems.""" + def setUp(self): - s_ll = dpdata.MultiSystems(dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy")) - s_hl = dpdata.MultiSystems(dpdata.LabeledSystem("amber/corr/dp_hl", fmt="deepmd/npy")) + s_ll = dpdata.MultiSystems( + dpdata.LabeledSystem("amber/corr/dp_ll", fmt="deepmd/npy") + ) + s_hl = dpdata.MultiSystems( + dpdata.LabeledSystem("amber/corr/dp_hl", fmt="deepmd/npy") + ) self.system_1 = tuple(s_ll.correction(s_hl).systems.values())[0] - self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr" ,fmt="deepmd/npy") + self.system_2 = dpdata.LabeledSystem("amber/corr/dp_corr", fmt="deepmd/npy") self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_cp2k_aimd_output.py b/tests/test_cp2k_aimd_output.py index 8e2bdd561..471153b10 100644 --- a/tests/test_cp2k_aimd_output.py +++ b/tests/test_cp2k_aimd_output.py @@ -1,30 +1,34 @@ - #%% import os import numpy as np import unittest from context import dpdata from comp_sys import CompLabeledSys + #%% class TestCp2kAimdOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/aimd',fmt='cp2k/aimd_output') - self.system_2 = dpdata.LabeledSystem('cp2k/aimd/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem("cp2k/aimd", fmt="cp2k/aimd_output") + self.system_2 = dpdata.LabeledSystem("cp2k/aimd/deepmd", fmt="deepmd/npy") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestCp2kAimdStressOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/aimd_stress',fmt='cp2k/aimd_output') - self.system_2 = dpdata.LabeledSystem('cp2k/aimd_stress/deepmd', fmt='deepmd/raw') + self.system_1 = dpdata.LabeledSystem("cp2k/aimd_stress", fmt="cp2k/aimd_output") + self.system_2 = dpdata.LabeledSystem( + "cp2k/aimd_stress/deepmd", fmt="deepmd/raw" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -#class TestCp2kAimdRestartOutput(unittest.TestCase, CompLabeledSys): + +# class TestCp2kAimdRestartOutput(unittest.TestCase, CompLabeledSys): # def setUp(self): # self.system_1 = dpdata.LabeledSystem('cp2k/restart_aimd',fmt='cp2k/aimd_output', restart=True) # self.system_2 = dpdata.LabeledSystem('cp2k/restart_aimd/deepmd', fmt='deepmd/raw') @@ -33,7 +37,7 @@ def setUp(self): # self.f_places = 6 # self.v_places = 4 # -#class TestCp2kAimdOutputError(unittest.TestCase): +# class TestCp2kAimdOutputError(unittest.TestCase): # def setUp(self): # pass # @@ -41,7 +45,7 @@ def setUp(self): # with self.assertRaises(AssertionError): # dpdata.LabeledSystem('cp2k/restart_aimd', fmt='cp2k/aimd_output', restart=False) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_cp2k_output.py b/tests/test_cp2k_output.py index eac3b182c..37c639c60 100644 --- a/tests/test_cp2k_output.py +++ b/tests/test_cp2k_output.py @@ -4,56 +4,71 @@ from context import dpdata from comp_sys import CompLabeledSys + class TestCp2kNormalOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/cp2k_normal_output/cp2k_output',fmt='cp2k/output') - self.system_2 = dpdata.LabeledSystem('cp2k/cp2k_normal_output/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem( + "cp2k/cp2k_normal_output/cp2k_output", fmt="cp2k/output" + ) + self.system_2 = dpdata.LabeledSystem( + "cp2k/cp2k_normal_output/deepmd", fmt="deepmd/npy" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 - + + class TestCP2KDuplicateHeader(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/cp2k_duplicate_header/cp2k_output_duplicate_header',fmt='cp2k/output') - self.system_2 = dpdata.LabeledSystem('cp2k/cp2k_duplicate_header/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem( + "cp2k/cp2k_duplicate_header/cp2k_output_duplicate_header", fmt="cp2k/output" + ) + self.system_2 = dpdata.LabeledSystem( + "cp2k/cp2k_duplicate_header/deepmd", fmt="deepmd/npy" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestCp2kReplaceElementOutput(unittest.TestCase, CompLabeledSys): def setUp(self): - self.system_1 = dpdata.LabeledSystem('cp2k/cp2k_element_replace/cp2k_output_element_replace',fmt='cp2k/output') - self.system_2 = dpdata.LabeledSystem('cp2k/cp2k_element_replace/deepmd', fmt='deepmd/npy') + self.system_1 = dpdata.LabeledSystem( + "cp2k/cp2k_element_replace/cp2k_output_element_replace", fmt="cp2k/output" + ) + self.system_2 = dpdata.LabeledSystem( + "cp2k/cp2k_element_replace/deepmd", fmt="deepmd/npy" + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestNonCoveragedCP2KOutput(unittest.TestCase): - def setUp (self) : - self.system = dpdata.LabeledSystem('cp2k/cp2k_nocon_output', - fmt = 'cp2k/output') - def test_atom_types(self) : - self.assertEqual(self.system.data['atom_types'], []) + def setUp(self): + self.system = dpdata.LabeledSystem("cp2k/cp2k_nocon_output", fmt="cp2k/output") - def test_cells(self) : - self.assertEqual(self.system.data['cells'], []) + def test_atom_types(self): + self.assertEqual(self.system.data["atom_types"], []) - def test_coords(self) : - self.assertEqual(self.system.data['coords'], []) + def test_cells(self): + self.assertEqual(self.system.data["cells"], []) - def test_energies(self) : - self.assertEqual(self.system.data['energies'], []) + def test_coords(self): + self.assertEqual(self.system.data["coords"], []) - def test_forces(self) : - self.assertEqual(self.system.data['forces'], []) + def test_energies(self): + self.assertEqual(self.system.data["energies"], []) - def test_virials(self) : - self.assertFalse('virials' in self.system.data) + def test_forces(self): + self.assertEqual(self.system.data["forces"], []) + def test_virials(self): + self.assertFalse("virials" in self.system.data) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_deepmd_comp.py b/tests/test_deepmd_comp.py index 840712af4..3b8068597 100644 --- a/tests/test_deepmd_comp.py +++ b/tests/test_deepmd_comp.py @@ -1,82 +1,79 @@ -import os,shutil +import os, shutil import numpy as np import unittest from context import dpdata from comp_sys import CompLabeledSys, CompSys, IsPBC + class TestDeepmdLoadDumpComp(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_1.to_deepmd_npy('tmp.deepmd.npy', - prec = np.float64, - set_size = 2) - - self.system_2 = dpdata.LabeledSystem('tmp.deepmd.npy', - fmt = 'deepmd/npy', - type_map = ['O', 'H']) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_1.to_deepmd_npy("tmp.deepmd.npy", prec=np.float64, set_size=2) + + self.system_2 = dpdata.LabeledSystem( + "tmp.deepmd.npy", fmt="deepmd/npy", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.npy'): - shutil.rmtree('tmp.deepmd.npy') - - -class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.h2o.md', - fmt = 'vasp/poscar') - self.system_1.to_deepmd_npy('tmp.deepmd.npy', - prec = np.float64, - set_size = 2) - self.system_2 = dpdata.System('tmp.deepmd.npy', - fmt = 'deepmd/npy', - type_map = ['O', 'H']) + def tearDown(self): + if os.path.exists("tmp.deepmd.npy"): + shutil.rmtree("tmp.deepmd.npy") + + +class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + self.system_1.to_deepmd_npy("tmp.deepmd.npy", prec=np.float64, set_size=2) + self.system_2 = dpdata.System( + "tmp.deepmd.npy", fmt="deepmd/npy", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.npy'): - shutil.rmtree('tmp.deepmd.npy') - + def tearDown(self): + if os.path.exists("tmp.deepmd.npy"): + shutil.rmtree("tmp.deepmd.npy") -class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp(self) : - self.dir_name = 'tmp.deepmd.npy.nol' + +class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.dir_name = "tmp.deepmd.npy.nol" natoms = 3 - atom_names = ['O', 'H'] + atom_names = ["O", "H"] atom_numbs = [1, 2] - atom_types = np.array([0, 1, 1], dtype = np.int32) + atom_types = np.array([0, 1, 1], dtype=np.int32) nframes = 11 half_n = 6 idx = [range(0, half_n), range(half_n, nframes)] - os.makedirs(self.dir_name, exist_ok = True) - os.makedirs(os.path.join(self.dir_name, 'set.000'), exist_ok = True) - os.makedirs(os.path.join(self.dir_name, 'set.001'), exist_ok = True) - np.savetxt(os.path.join(self.dir_name, 'type.raw'), atom_types, fmt = '%d') - + os.makedirs(self.dir_name, exist_ok=True) + os.makedirs(os.path.join(self.dir_name, "set.000"), exist_ok=True) + os.makedirs(os.path.join(self.dir_name, "set.001"), exist_ok=True) + np.savetxt(os.path.join(self.dir_name, "type.raw"), atom_types, fmt="%d") + coords = np.random.random([nframes, natoms, 3]) cells = np.random.random([nframes, 3, 3]) - np.save(os.path.join(self.dir_name, 'set.000', 'coord.npy'), coords[idx[0]]) - np.save(os.path.join(self.dir_name, 'set.000', 'box.npy'), cells [idx[0]]) - np.save(os.path.join(self.dir_name, 'set.001', 'coord.npy'), coords[idx[1]]) - np.save(os.path.join(self.dir_name, 'set.001', 'box.npy'), cells [idx[1]]) - + np.save(os.path.join(self.dir_name, "set.000", "coord.npy"), coords[idx[0]]) + np.save(os.path.join(self.dir_name, "set.000", "box.npy"), cells[idx[0]]) + np.save(os.path.join(self.dir_name, "set.001", "coord.npy"), coords[idx[1]]) + np.save(os.path.join(self.dir_name, "set.001", "box.npy"), cells[idx[1]]) + data = { - 'atom_names' : atom_names, - 'atom_types' : atom_types, - 'atom_numbs' : atom_numbs, - 'coords' : coords, - 'cells' : cells, - 'orig' : np.zeros(3), + "atom_names": atom_names, + "atom_types": atom_types, + "atom_numbs": atom_numbs, + "coords": coords, + "cells": cells, + "orig": np.zeros(3), } - self.system_1 = dpdata.System(self.dir_name, fmt = 'deepmd/npy', type_map = ['O', 'H']) + self.system_1 = dpdata.System( + self.dir_name, fmt="deepmd/npy", type_map=["O", "H"] + ) self.system_2 = dpdata.System() self.system_2.data = data @@ -85,11 +82,10 @@ def setUp(self) : self.f_places = 6 self.v_places = 6 - - def tearDown(self) : + def tearDown(self): if os.path.exists(self.dir_name): shutil.rmtree(self.dir_name) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_deepmd_hdf5.py b/tests/test_deepmd_hdf5.py index 08d25730e..24ed4f0dd 100644 --- a/tests/test_deepmd_hdf5.py +++ b/tests/test_deepmd_hdf5.py @@ -4,67 +4,68 @@ from context import dpdata from comp_sys import CompLabeledSys, CompSys, IsNoPBC, IsPBC, MultiSystems + class TestDeepmdLoadDumpHDF5(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5', - prec = np.float64, - set_size = 2) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_1.to_deepmd_hdf5("tmp.deepmd.hdf5", prec=np.float64, set_size=2) - self.system_2 = dpdata.LabeledSystem('tmp.deepmd.hdf5', - fmt = 'deepmd/hdf5', - type_map = ['O', 'H']) + self.system_2 = dpdata.LabeledSystem( + "tmp.deepmd.hdf5", fmt="deepmd/hdf5", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.hdf5'): - os.remove('tmp.deepmd.hdf5') + def tearDown(self): + if os.path.exists("tmp.deepmd.hdf5"): + os.remove("tmp.deepmd.hdf5") -class TestDeepmdHDF5NoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.h2o.md', - fmt = 'vasp/poscar') - self.system_1.to_deepmd_hdf5('tmp.deepmd.hdf5', - prec = np.float64, - set_size = 2) - self.system_2 = dpdata.System('tmp.deepmd.hdf5', - fmt = 'deepmd/hdf5', - type_map = ['O', 'H']) +class TestDeepmdHDF5NoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + self.system_1.to_deepmd_hdf5("tmp.deepmd.hdf5", prec=np.float64, set_size=2) + self.system_2 = dpdata.System( + "tmp.deepmd.hdf5", fmt="deepmd/hdf5", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd.hdf5'): - os.remove('tmp.deepmd.hdf5') + def tearDown(self): + if os.path.exists("tmp.deepmd.hdf5"): + os.remove("tmp.deepmd.hdf5") class TestHDF5Multi(unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC): - def setUp (self): + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) systems = dpdata.MultiSystems(system_1, system_2, system_3) systems.to_deepmd_hdf5("tmp.deepmd.hdf5") self.systems = dpdata.MultiSystems().from_deepmd_hdf5("tmp.deepmd.hdf5") - self.system_names = ['C1H4', 'C1H3'] - self.system_sizes = {'C1H4':2, 'C1H3':1} - self.atom_names = ['C', 'H'] - self.system_1 = self.systems['C1H3'] + self.system_names = ["C1H4", "C1H3"] + self.system_sizes = {"C1H4": 2, "C1H3": 1} + self.atom_names = ["C", "H"] + self.system_1 = self.systems["C1H3"] self.system_2 = system_3 - def tearDown(self) : - if os.path.exists('tmp.deepmd.hdf5'): - os.remove('tmp.deepmd.hdf5') + def tearDown(self): + if os.path.exists("tmp.deepmd.hdf5"): + os.remove("tmp.deepmd.hdf5") diff --git a/tests/test_deepmd_raw.py b/tests/test_deepmd_raw.py index 241da7167..5ba54877d 100644 --- a/tests/test_deepmd_raw.py +++ b/tests/test_deepmd_raw.py @@ -1,16 +1,16 @@ -import os,shutil +import os, shutil import numpy as np import unittest from context import dpdata from comp_sys import CompLabeledSys, CompSys, IsPBC + class TestDeepmdLoadRaw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_2 = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_2 = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 @@ -18,163 +18,162 @@ def setUp (self) : class TestDeepmdDumpRaw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_1.to_deepmd_raw('tmp.deepmd') - self.system_2 = dpdata.LabeledSystem('tmp.deepmd', type_map = ['O', 'H']) + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_1.to_deepmd_raw("tmp.deepmd") + self.system_2 = dpdata.LabeledSystem("tmp.deepmd", type_map=["O", "H"]) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd'): - shutil.rmtree('tmp.deepmd') + def tearDown(self): + if os.path.exists("tmp.deepmd"): + shutil.rmtree("tmp.deepmd") class TestDeepmdTypeMap(unittest.TestCase): - def tearDown(self) : - if os.path.exists('tmp.deepmd'): - shutil.rmtree('tmp.deepmd') - - def test_type_map (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_raw('tmp.deepmd') - with open(os.path.join('tmp.deepmd', 'type_map.raw')) as fp: + def tearDown(self): + if os.path.exists("tmp.deepmd"): + shutil.rmtree("tmp.deepmd") + + def test_type_map(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_raw("tmp.deepmd") + with open(os.path.join("tmp.deepmd", "type_map.raw")) as fp: tm = fp.read().split() - self.assertEqual(tm, ['O', 'H']) - self.assertEqual(system_1['atom_names'], ['O', 'H']) - self.assertEqual(system_1['atom_types'][0], 0) - self.assertEqual(system_1['atom_types'][1], 0) - self.assertEqual(system_1['atom_types'][2], 1) - self.assertEqual(system_1['atom_types'][3], 1) - self.assertEqual(system_1['atom_types'][4], 1) - self.assertEqual(system_1['atom_types'][5], 1) - - def test_type_map_load (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_raw('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd') - self.assertEqual(system_2['atom_names'], ['O', 'H']) - self.assertEqual(system_2['atom_types'][0], 0) - self.assertEqual(system_2['atom_types'][1], 0) - self.assertEqual(system_2['atom_types'][2], 1) - self.assertEqual(system_2['atom_types'][3], 1) - self.assertEqual(system_2['atom_types'][4], 1) - self.assertEqual(system_2['atom_types'][5], 1) - self.assertEqual(system_2['atom_numbs'][0], 2) - self.assertEqual(system_2['atom_numbs'][1], 4) - - def test_type_map_enforce (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_raw('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd', type_map = ['H', 'O']) - self.assertEqual(system_2['atom_names'], ['H', 'O']) - self.assertEqual(system_2['atom_types'][0], 1) - self.assertEqual(system_2['atom_types'][1], 1) - self.assertEqual(system_2['atom_types'][2], 0) - self.assertEqual(system_2['atom_types'][3], 0) - self.assertEqual(system_2['atom_types'][4], 0) - self.assertEqual(system_2['atom_types'][5], 0) - self.assertEqual(system_2['atom_numbs'][0], 4) - self.assertEqual(system_2['atom_numbs'][1], 2) - - def test_npy_type_map (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_npy('tmp.deepmd') - with open(os.path.join('tmp.deepmd', 'type_map.raw')) as fp: + self.assertEqual(tm, ["O", "H"]) + self.assertEqual(system_1["atom_names"], ["O", "H"]) + self.assertEqual(system_1["atom_types"][0], 0) + self.assertEqual(system_1["atom_types"][1], 0) + self.assertEqual(system_1["atom_types"][2], 1) + self.assertEqual(system_1["atom_types"][3], 1) + self.assertEqual(system_1["atom_types"][4], 1) + self.assertEqual(system_1["atom_types"][5], 1) + + def test_type_map_load(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_raw("tmp.deepmd") + system_2 = dpdata.LabeledSystem("tmp.deepmd") + self.assertEqual(system_2["atom_names"], ["O", "H"]) + self.assertEqual(system_2["atom_types"][0], 0) + self.assertEqual(system_2["atom_types"][1], 0) + self.assertEqual(system_2["atom_types"][2], 1) + self.assertEqual(system_2["atom_types"][3], 1) + self.assertEqual(system_2["atom_types"][4], 1) + self.assertEqual(system_2["atom_types"][5], 1) + self.assertEqual(system_2["atom_numbs"][0], 2) + self.assertEqual(system_2["atom_numbs"][1], 4) + + def test_type_map_enforce(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_raw("tmp.deepmd") + system_2 = dpdata.LabeledSystem("tmp.deepmd", type_map=["H", "O"]) + self.assertEqual(system_2["atom_names"], ["H", "O"]) + self.assertEqual(system_2["atom_types"][0], 1) + self.assertEqual(system_2["atom_types"][1], 1) + self.assertEqual(system_2["atom_types"][2], 0) + self.assertEqual(system_2["atom_types"][3], 0) + self.assertEqual(system_2["atom_types"][4], 0) + self.assertEqual(system_2["atom_types"][5], 0) + self.assertEqual(system_2["atom_numbs"][0], 4) + self.assertEqual(system_2["atom_numbs"][1], 2) + + def test_npy_type_map(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_npy("tmp.deepmd") + with open(os.path.join("tmp.deepmd", "type_map.raw")) as fp: tm = fp.read().split() - self.assertEqual(tm, ['O', 'H']) - self.assertEqual(system_1['atom_names'], ['O', 'H']) - self.assertEqual(system_1['atom_types'][0], 0) - self.assertEqual(system_1['atom_types'][1], 0) - self.assertEqual(system_1['atom_types'][2], 1) - self.assertEqual(system_1['atom_types'][3], 1) - self.assertEqual(system_1['atom_types'][4], 1) - self.assertEqual(system_1['atom_types'][5], 1) - - def test_npy_type_map_load (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_npy('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd', fmt = 'deepmd/npy') - self.assertEqual(system_2['atom_names'], ['O', 'H']) - self.assertEqual(system_2['atom_types'][0], 0) - self.assertEqual(system_2['atom_types'][1], 0) - self.assertEqual(system_2['atom_types'][2], 1) - self.assertEqual(system_2['atom_types'][3], 1) - self.assertEqual(system_2['atom_types'][4], 1) - self.assertEqual(system_2['atom_types'][5], 1) - self.assertEqual(system_2['atom_numbs'][0], 2) - self.assertEqual(system_2['atom_numbs'][1], 4) - - def test_npy_type_map_enforce (self) : - system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - system_1.to_deepmd_npy('tmp.deepmd') - system_2 = dpdata.LabeledSystem('tmp.deepmd', type_map = ['H', 'O'], fmt = 'deepmd/npy') - self.assertEqual(system_2['atom_names'], ['H', 'O']) - self.assertEqual(system_2['atom_types'][0], 1) - self.assertEqual(system_2['atom_types'][1], 1) - self.assertEqual(system_2['atom_types'][2], 0) - self.assertEqual(system_2['atom_types'][3], 0) - self.assertEqual(system_2['atom_types'][4], 0) - self.assertEqual(system_2['atom_types'][5], 0) - self.assertEqual(system_2['atom_numbs'][0], 4) - self.assertEqual(system_2['atom_numbs'][1], 2) - - - - -class TestDeepmdRawNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.h2o.md', - fmt = 'vasp/poscar') - self.system_1.to_deepmd_raw('tmp.deepmd') - self.system_2 = dpdata.System('tmp.deepmd', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + self.assertEqual(tm, ["O", "H"]) + self.assertEqual(system_1["atom_names"], ["O", "H"]) + self.assertEqual(system_1["atom_types"][0], 0) + self.assertEqual(system_1["atom_types"][1], 0) + self.assertEqual(system_1["atom_types"][2], 1) + self.assertEqual(system_1["atom_types"][3], 1) + self.assertEqual(system_1["atom_types"][4], 1) + self.assertEqual(system_1["atom_types"][5], 1) + + def test_npy_type_map_load(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_npy("tmp.deepmd") + system_2 = dpdata.LabeledSystem("tmp.deepmd", fmt="deepmd/npy") + self.assertEqual(system_2["atom_names"], ["O", "H"]) + self.assertEqual(system_2["atom_types"][0], 0) + self.assertEqual(system_2["atom_types"][1], 0) + self.assertEqual(system_2["atom_types"][2], 1) + self.assertEqual(system_2["atom_types"][3], 1) + self.assertEqual(system_2["atom_types"][4], 1) + self.assertEqual(system_2["atom_types"][5], 1) + self.assertEqual(system_2["atom_numbs"][0], 2) + self.assertEqual(system_2["atom_numbs"][1], 4) + + def test_npy_type_map_enforce(self): + system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + system_1.to_deepmd_npy("tmp.deepmd") + system_2 = dpdata.LabeledSystem( + "tmp.deepmd", type_map=["H", "O"], fmt="deepmd/npy" + ) + self.assertEqual(system_2["atom_names"], ["H", "O"]) + self.assertEqual(system_2["atom_types"][0], 1) + self.assertEqual(system_2["atom_types"][1], 1) + self.assertEqual(system_2["atom_types"][2], 0) + self.assertEqual(system_2["atom_types"][3], 0) + self.assertEqual(system_2["atom_types"][4], 0) + self.assertEqual(system_2["atom_types"][5], 0) + self.assertEqual(system_2["atom_numbs"][0], 4) + self.assertEqual(system_2["atom_numbs"][1], 2) + + +class TestDeepmdRawNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + self.system_1.to_deepmd_raw("tmp.deepmd") + self.system_2 = dpdata.System( + "tmp.deepmd", fmt="deepmd/raw", type_map=["O", "H"] + ) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - def tearDown(self) : - if os.path.exists('tmp.deepmd'): - shutil.rmtree('tmp.deepmd') + def tearDown(self): + if os.path.exists("tmp.deepmd"): + shutil.rmtree("tmp.deepmd") -class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC) : - def setUp(self) : - self.dir_name = 'tmp.deepmd.nol' +class TestDeepmdCompNoLabels(unittest.TestCase, CompSys, IsPBC): + def setUp(self): + self.dir_name = "tmp.deepmd.nol" natoms = 3 - atom_names = ['O', 'H'] + atom_names = ["O", "H"] atom_numbs = [1, 2] - atom_types = np.array([0, 1, 1], dtype = np.int32) + atom_types = np.array([0, 1, 1], dtype=np.int32) nframes = 11 - os.makedirs(self.dir_name, exist_ok = True) - np.savetxt(os.path.join(self.dir_name, 'type.raw'), atom_types, fmt = '%d') - + os.makedirs(self.dir_name, exist_ok=True) + np.savetxt(os.path.join(self.dir_name, "type.raw"), atom_types, fmt="%d") + coords = np.random.random([nframes, natoms, 3]) cells = np.random.random([nframes, 3, 3]) - np.savetxt(os.path.join(self.dir_name, '', 'coord.raw'), np.reshape(coords, [nframes, -1])) - np.savetxt(os.path.join(self.dir_name, '', 'box.raw'), np.reshape(cells, [nframes, -1])) - + np.savetxt( + os.path.join(self.dir_name, "", "coord.raw"), + np.reshape(coords, [nframes, -1]), + ) + np.savetxt( + os.path.join(self.dir_name, "", "box.raw"), np.reshape(cells, [nframes, -1]) + ) + data = { - 'atom_names' : atom_names, - 'atom_types' : atom_types, - 'atom_numbs' : atom_numbs, - 'coords' : coords, - 'cells' : cells, - 'orig' : np.zeros(3), + "atom_names": atom_names, + "atom_types": atom_types, + "atom_numbs": atom_numbs, + "coords": coords, + "cells": cells, + "orig": np.zeros(3), } - self.system_1 = dpdata.System(self.dir_name, fmt = 'deepmd/raw', type_map = ['O', 'H']) + self.system_1 = dpdata.System( + self.dir_name, fmt="deepmd/raw", type_map=["O", "H"] + ) self.system_2 = dpdata.System() self.system_2.data = data @@ -183,11 +182,10 @@ def setUp(self) : self.f_places = 6 self.v_places = 6 - - def tearDown(self) : + def tearDown(self): if os.path.exists(self.dir_name): shutil.rmtree(self.dir_name) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_elements_index.py b/tests/test_elements_index.py index 23edd5e24..6b924548b 100644 --- a/tests/test_elements_index.py +++ b/tests/test_elements_index.py @@ -3,25 +3,28 @@ import unittest from dpdata.system import elements_index_map + class ElementIndexMap(unittest.TestCase): - def test_func1(self): - element=["C","N","H"] - ref={'C': 0, 'N': 1, 'H': 2} - self.assertEqual(ref,elements_index_map(element)) + def test_func1(self): + element = ["C", "N", "H"] + ref = {"C": 0, "N": 1, "H": 2} + self.assertEqual(ref, elements_index_map(element)) + + def test_func2(self): + element = ["C", "N", "H"] + ref = {"H": 0, "C": 1, "N": 2} + self.assertEqual(ref, elements_index_map(element, standard=True)) + + def test_func3(self): + element = ["C", "N", "H"] + ref = {0: "H", 1: "C", 2: "N"} + self.assertEqual(ref, elements_index_map(element, standard=True, inverse=True)) - def test_func2(self): - element=["C","N","H"] - ref={'H': 0, 'C': 1, 'N': 2} - self.assertEqual(ref,elements_index_map(element,standard=True)) + def test_func4(self): + element = ["C", "N", "H"] + ref = {0: "C", 1: "N", 2: "H"} + self.assertEqual(ref, elements_index_map(element, inverse=True)) - def test_func3(self): - element=["C","N","H"] - ref={0: 'H', 1: 'C', 2: 'N'} - self.assertEqual(ref,elements_index_map(element,standard=True,inverse=True)) - def test_func4(self): - element=["C","N","H"] - ref={0: 'C', 1: 'N', 2: 'H'} - self.assertEqual(ref,elements_index_map(element,inverse=True)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_empty.py b/tests/test_empty.py index 2a9fd252d..0fd84ca0f 100644 --- a/tests/test_empty.py +++ b/tests/test_empty.py @@ -3,29 +3,31 @@ import unittest from context import dpdata + class TestEmptySystem(unittest.TestCase): def test_empty(self): - sys1 = dpdata.System(type_map = ['A', 'H', 'B', 'O', 'D']) - sys2 = dpdata.LabeledSystem(type_map = ['A', 'H', 'B', 'O', 'D']) + sys1 = dpdata.System(type_map=["A", "H", "B", "O", "D"]) + sys2 = dpdata.LabeledSystem(type_map=["A", "H", "B", "O", "D"]) def test_data_empty(self): - data = {'atom_names' : ['A', 'B'], - 'atom_numbs' : [0,0], - 'atom_types' : np.array([], dtype = int), - 'orig': np.array([0, 0, 0]), - 'cells': np.array([]), - 'coords': np.array([]), + data = { + "atom_names": ["A", "B"], + "atom_numbs": [0, 0], + "atom_types": np.array([], dtype=int), + "orig": np.array([0, 0, 0]), + "cells": np.array([]), + "coords": np.array([]), } - sys1 = dpdata.System(data = data) - data = {'atom_names' : ['A', 'B'], - 'atom_numbs' : [0,0], - 'atom_types' : np.array([], dtype = int), - 'orig': np.array([0, 0, 0]), - 'cells': np.array([]), - 'coords': np.array([]), - 'forces': np.array([]), - 'energies': np.array([]), - 'virials': np.array([]), + sys1 = dpdata.System(data=data) + data = { + "atom_names": ["A", "B"], + "atom_numbs": [0, 0], + "atom_types": np.array([], dtype=int), + "orig": np.array([0, 0, 0]), + "cells": np.array([]), + "coords": np.array([]), + "forces": np.array([]), + "energies": np.array([]), + "virials": np.array([]), } - sys2 = dpdata.LabeledSystem(data = data) - + sys2 = dpdata.LabeledSystem(data=data) diff --git a/tests/test_fhi_md_multi_elem_output.py b/tests/test_fhi_md_multi_elem_output.py index 39cc4fb7a..dc4cbfcf2 100644 --- a/tests/test_fhi_md_multi_elem_output.py +++ b/tests/test_fhi_md_multi_elem_output.py @@ -5,50 +5,57 @@ class TestFhi_aims_MD: def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ["C","H","O","N"]) + self.assertEqual(self.system.data["atom_names"], ["C", "H", "O", "N"]) def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [32,36,8,4]) + self.assertEqual(self.system.data["atom_numbs"], [32, 36, 8, 4]) def test_atom_types(self): - ref_type = [0, 1, 1,] + ref_type = [ + 0, + 1, + 1, + ] ref_type = np.array(ref_type) for ii in range(ref_type.shape[0]): - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) def test_cell(self): - ref_cell=np.loadtxt('fhi_aims/ref_cell_md_m.txt') - ref_cell=ref_cell.flatten() - cells = self.system.data['cells'].flatten() + ref_cell = np.loadtxt("fhi_aims/ref_cell_md_m.txt") + ref_cell = ref_cell.flatten() + cells = self.system.data["cells"].flatten() idx = 0 for ii in range(len(cells)): self.assertAlmostEqual(cells[ii], float(ref_cell[ii])) def test_coord(self): - ref_coord=np.loadtxt('fhi_aims/ref_coord_md_m.txt') - ref_coord=ref_coord.flatten() - coords = self.system.data['coords'].flatten() + ref_coord = np.loadtxt("fhi_aims/ref_coord_md_m.txt") + ref_coord = ref_coord.flatten() + coords = self.system.data["coords"].flatten() for ii in range(len(coords)): self.assertAlmostEqual(coords[ii], float(ref_coord[ii])) def test_force(self): - ref_force=np.loadtxt('fhi_aims/ref_force_md_m.txt') - ref_force=ref_force.flatten() - forces = self.system.data['forces'].flatten() + ref_force = np.loadtxt("fhi_aims/ref_force_md_m.txt") + ref_force = ref_force.flatten() + forces = self.system.data["forces"].flatten() for ii in range(len(forces)): self.assertAlmostEqual(forces[ii], float(ref_force[ii])) def test_energy(self): - ref_energy=np.loadtxt('fhi_aims/ref_energy_md_m.txt') - ref_energy=ref_energy.flatten() - energy = self.system.data['energies'] + ref_energy = np.loadtxt("fhi_aims/ref_energy_md_m.txt") + ref_energy = ref_energy.flatten() + energy = self.system.data["energies"] for ii in range(len(energy)): self.assertAlmostEqual(energy[ii], ref_energy[ii]) class TestFhi_aims_Output(unittest.TestCase, TestFhi_aims_MD): def setUp(self): - self.system = dpdata.LabeledSystem('fhi_aims/output_multi_elements', fmt='fhi_aims/md') + self.system = dpdata.LabeledSystem( + "fhi_aims/output_multi_elements", fmt="fhi_aims/md" + ) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_fhi_md_output.py b/tests/test_fhi_md_output.py index 2b0751809..3f945b318 100644 --- a/tests/test_fhi_md_output.py +++ b/tests/test_fhi_md_output.py @@ -5,50 +5,55 @@ class TestFhi_aims_MD: def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ["B","N"]) + self.assertEqual(self.system.data["atom_names"], ["B", "N"]) def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [1,2]) + self.assertEqual(self.system.data["atom_numbs"], [1, 2]) def test_atom_types(self): - ref_type = [0, 1, 1,] + ref_type = [ + 0, + 1, + 1, + ] ref_type = np.array(ref_type) for ii in range(ref_type.shape[0]): - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) def test_cell(self): - ref_cell=np.loadtxt('fhi_aims/ref_cell_md.txt') - ref_cell=ref_cell.flatten() - cells = self.system.data['cells'].flatten() + ref_cell = np.loadtxt("fhi_aims/ref_cell_md.txt") + ref_cell = ref_cell.flatten() + cells = self.system.data["cells"].flatten() idx = 0 for ii in range(len(cells)): self.assertAlmostEqual(cells[ii], float(ref_cell[ii])) def test_coord(self): - ref_coord=np.loadtxt('fhi_aims/ref_coord_md.txt') - ref_coord=ref_coord.flatten() - coords = self.system.data['coords'].flatten() + ref_coord = np.loadtxt("fhi_aims/ref_coord_md.txt") + ref_coord = ref_coord.flatten() + coords = self.system.data["coords"].flatten() for ii in range(len(coords)): self.assertAlmostEqual(coords[ii], float(ref_coord[ii])) def test_force(self): - ref_force=np.loadtxt('fhi_aims/ref_force_md.txt') - ref_force=ref_force.flatten() - forces = self.system.data['forces'].flatten() + ref_force = np.loadtxt("fhi_aims/ref_force_md.txt") + ref_force = ref_force.flatten() + forces = self.system.data["forces"].flatten() for ii in range(len(forces)): self.assertAlmostEqual(forces[ii], float(ref_force[ii])) def test_energy(self): - ref_energy=np.loadtxt('fhi_aims/ref_energy_md.txt') - ref_energy=ref_energy.flatten() - energy = self.system.data['energies'] + ref_energy = np.loadtxt("fhi_aims/ref_energy_md.txt") + ref_energy = ref_energy.flatten() + energy = self.system.data["energies"] for ii in range(len(energy)): self.assertAlmostEqual(energy[ii], ref_energy[ii]) class TestFhi_aims_Output(unittest.TestCase, TestFhi_aims_MD): def setUp(self): - self.system = dpdata.LabeledSystem('fhi_aims/out_md', fmt='fhi_aims/md') + self.system = dpdata.LabeledSystem("fhi_aims/out_md", fmt="fhi_aims/md") -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_fhi_output.py b/tests/test_fhi_output.py index b1ccb730e..7c8307ea1 100644 --- a/tests/test_fhi_output.py +++ b/tests/test_fhi_output.py @@ -3,72 +3,74 @@ import unittest from context import dpdata + class TestFhi_aims: - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['B','N']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [1, 1]) - def test_atom_types(self) : - ref_type = [0,1] + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["B", "N"]) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [1, 1]) + + def test_atom_types(self): + ref_type = [0, 1] ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + for ii in range(ref_type.shape[0]): + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) - def test_cell(self) : - cell = np.loadtxt('fhi_aims/ref_cell.txt').flatten() - res = self.system.data['cells'][0].flatten() + def test_cell(self): + cell = np.loadtxt("fhi_aims/ref_cell.txt").flatten() + res = self.system.data["cells"][0].flatten() for ii in range(len(cell)): self.assertAlmostEqual(res[ii], cell[ii]) - def test_coord(self) : - coord = np.loadtxt('fhi_aims/ref_coord.txt').flatten() - res = self.system.data['coords'][0].flatten() - for ii in range(len(coord)) : + def test_coord(self): + coord = np.loadtxt("fhi_aims/ref_coord.txt").flatten() + res = self.system.data["coords"][0].flatten() + for ii in range(len(coord)): self.assertAlmostEqual(res[ii], float(coord[ii])) - def test_force(self) : - force = np.loadtxt('fhi_aims/ref_force.txt').flatten() - res = self.system.data['forces'][0].flatten() + def test_force(self): + force = np.loadtxt("fhi_aims/ref_force.txt").flatten() + res = self.system.data["forces"][0].flatten() for ii in range(len(force)): self.assertAlmostEqual(res[ii], float(force[ii])) - # def test_viriale(self) : - # toViri = 1 - # fp = open('fhi_aims/ref_cell') - # cell = [] - # for ii in fp: - # for jj in ii.split(): - # cell.append(float(jj)) - # cell = np.array(cell) - # cells = cell.reshape(3,3) - # fp.close() + # def test_viriale(self) : + # toViri = 1 + # fp = open('fhi_aims/ref_cell') + # cell = [] + # for ii in fp: + # for jj in ii.split(): + # cell.append(float(jj)) + # cell = np.array(cell) + # cells = cell.reshape(3,3) + # fp.close() - # toVol = [] - # for ii in cells: - # ### calucate vol - # toVol.append(np.linalg.det(cells)) + # toVol = [] + # for ii in cells: + # ### calucate vol + # toVol.append(np.linalg.det(cells)) - # fp = open('fhi_aims/ref_virial') - # virial = [] - # for ii in fp: - # for jj in ii.split(): - # virial.append(float(jj) * toViri * toVol[0]) - # virial = np.array(virial) - # fp.close() - # res = self.system.data['virials'][0].flatten() - # for ii in range(len(virial)): - # self.assertAlmostEqual(res[ii], float(virial[ii])) + # fp = open('fhi_aims/ref_virial') + # virial = [] + # for ii in fp: + # for jj in ii.split(): + # virial.append(float(jj) * toViri * toVol[0]) + # virial = np.array(virial) + # fp.close() + # res = self.system.data['virials'][0].flatten() + # for ii in range(len(virial)): + # self.assertAlmostEqual(res[ii], float(virial[ii])) - def test_energy(self) : - ref_energy = -0.215215685892915E+04 - self.assertAlmostEqual(self.system.data['energies'][0], ref_energy,places = 6) + def test_energy(self): + ref_energy = -0.215215685892915e04 + self.assertAlmostEqual(self.system.data["energies"][0], ref_energy, places=6) class TestFhiOutput(unittest.TestCase, TestFhi_aims): - def setUp(self): - self.system = dpdata.LabeledSystem('fhi_aims/out_scf', fmt = 'fhi_aims/scf') + self.system = dpdata.LabeledSystem("fhi_aims/out_scf", fmt="fhi_aims/scf") -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_gaussian_driver.py b/tests/test_gaussian_driver.py index 69d0c42ec..f1029ec11 100644 --- a/tests/test_gaussian_driver.py +++ b/tests/test_gaussian_driver.py @@ -9,78 +9,97 @@ @unittest.skipIf(shutil.which("g16") is None, "g16 is not installed") -@unittest.skipIf(importlib.util.find_spec("openbabel") is None, "openbabel is not installed") +@unittest.skipIf( + importlib.util.find_spec("openbabel") is None, "openbabel is not installed" +) class TestGaussianDriver(unittest.TestCase, CompSys, IsNoPBC): """Test Gaussian with a hydrogen ion.""" + @classmethod def setUpClass(cls): - cls.system_1 = dpdata.System(data={ - "atom_names": ["H"], - "atom_numbs": [1], - "atom_types": np.zeros((1,), dtype=int), - "coords": np.zeros((1, 1, 3), dtype=np.float32), - "cells": np.zeros((1, 3, 3), dtype=np.float32), - "orig": np.zeros(3, dtype=np.float32), - "nopbc": True, - }) - cls.system_2 = cls.system_1.predict(keywords="force B3LYP", charge=1, driver="gaussian") + cls.system_1 = dpdata.System( + data={ + "atom_names": ["H"], + "atom_numbs": [1], + "atom_types": np.zeros((1,), dtype=int), + "coords": np.zeros((1, 1, 3), dtype=np.float32), + "cells": np.zeros((1, 3, 3), dtype=np.float32), + "orig": np.zeros(3, dtype=np.float32), + "nopbc": True, + } + ) + cls.system_2 = cls.system_1.predict( + keywords="force B3LYP", charge=1, driver="gaussian" + ) cls.places = 6 - + def test_energy(self): - self.assertAlmostEqual(self.system_2['energies'].ravel()[0], 0.) - + self.assertAlmostEqual(self.system_2["energies"].ravel()[0], 0.0) + def test_forces(self): - forces = self.system_2['forces'] + forces = self.system_2["forces"] np.testing.assert_allclose(forces, np.zeros_like(forces)) class TestMakeGaussian(unittest.TestCase): """This class will not check if the output is correct, but only see if there is any errors.""" + def setUp(self): - self.system = dpdata.System(data={ - "atom_names": ["H"], - "atom_numbs": [1], - "atom_types": np.zeros((1,), dtype=int), - "coords": np.zeros((1, 1, 3), dtype=np.float32), - "cells": np.zeros((1, 3, 3), dtype=np.float32), - "orig": np.zeros(3, dtype=np.float32), - "nopbc": True, - }) - - @unittest.skipIf(importlib.util.find_spec("openbabel") is None, "requires openbabel") + self.system = dpdata.System( + data={ + "atom_names": ["H"], + "atom_numbs": [1], + "atom_types": np.zeros((1,), dtype=int), + "coords": np.zeros((1, 1, 3), dtype=np.float32), + "cells": np.zeros((1, 3, 3), dtype=np.float32), + "orig": np.zeros(3, dtype=np.float32), + "nopbc": True, + } + ) + + @unittest.skipIf( + importlib.util.find_spec("openbabel") is None, "requires openbabel" + ) def test_make_fp_gaussian(self): self.system.to_gaussian_gjf("gaussian/tmp.gjf", keywords="wb97x/6-31g* force") def test_make_fp_gaussian_multiplicity_one(self): - self.system.to_gaussian_gjf("gaussian/tmp.gjf", keywords="wb97x/6-31g* force", multiplicity=1) + self.system.to_gaussian_gjf( + "gaussian/tmp.gjf", keywords="wb97x/6-31g* force", multiplicity=1 + ) def test_detect_multiplicity(self): # oxygen O2 3 - self._check_multiplicity(['O', 'O'], 3) + self._check_multiplicity(["O", "O"], 3) # methane CH4 1 - self._check_multiplicity(['C', 'H', 'H', 'H', 'H'], 1) + self._check_multiplicity(["C", "H", "H", "H", "H"], 1) # CH3 2 - self._check_multiplicity(['C', 'H', 'H', 'H'], 2) + self._check_multiplicity(["C", "H", "H", "H"], 2) # CH2 1 - self._check_multiplicity(['C', 'H', 'H'], 1) + self._check_multiplicity(["C", "H", "H"], 1) # CH 2 - self._check_multiplicity(['C', 'H'], 2) + self._check_multiplicity(["C", "H"], 2) def _check_multiplicity(self, symbols, multiplicity): - self.assertEqual(dpdata.gaussian.gjf.detect_multiplicity(np.array(symbols)), multiplicity) + self.assertEqual( + dpdata.gaussian.gjf.detect_multiplicity(np.array(symbols)), multiplicity + ) def tearDown(self): - if os.path.exists('gaussian/tmp.gjf'): - os.remove('gaussian/tmp.gjf') + if os.path.exists("gaussian/tmp.gjf"): + os.remove("gaussian/tmp.gjf") class TestDumpGaussianGjf(unittest.TestCase): def setUp(self): - self.system = dpdata.LabeledSystem('gaussian/methane.gaussianlog', - fmt='gaussian/log') + self.system = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) def test_dump_to_gjf(self): - self.system.to_gaussian_gjf("gaussian/tmp.gjf", keywords="force B3LYP/6-31G(d)", multiplicity=1) + self.system.to_gaussian_gjf( + "gaussian/tmp.gjf", keywords="force B3LYP/6-31G(d)", multiplicity=1 + ) with open("gaussian/tmp.gjf") as f: f.readline() header = f.readline().strip() @@ -99,11 +118,11 @@ def test_dump_to_gjf(self): self.assertEqual(title, self.system.formula) self.assertEqual(charge, 0) self.assertEqual(mult, 1) - self.assertEqual(atoms, ['C', 'H', 'H', 'H', 'H']) - for i in range(self.system['coords'].shape[1]): + self.assertEqual(atoms, ["C", "H", "H", "H", "H"]) + for i in range(self.system["coords"].shape[1]): for j in range(3): - self.assertAlmostEqual(coords[i][j], self.system['coords'][0][i][j]) + self.assertAlmostEqual(coords[i][j], self.system["coords"][0][i][j]) def tearDown(self): - if os.path.exists('gaussian/tmp.gjf'): - os.remove('gaussian/tmp.gjf') + if os.path.exists("gaussian/tmp.gjf"): + os.remove("gaussian/tmp.gjf") diff --git a/tests/test_gaussian_gjf.py b/tests/test_gaussian_gjf.py index 350b5025d..24cb56bd8 100644 --- a/tests/test_gaussian_gjf.py +++ b/tests/test_gaussian_gjf.py @@ -5,10 +5,9 @@ class TestGaussianGJF(unittest.TestCase): - def setUp (self) : - self.system = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - + def setUp(self): + self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + def test_dump_gaussian_gjf(self): - self.system.to_gaussian_gjf('tmp.gjf', keywords="force b3lyp/6-31g*") - os.remove('tmp.gjf') + self.system.to_gaussian_gjf("tmp.gjf", keywords="force b3lyp/6-31g*") + os.remove("tmp.gjf") diff --git a/tests/test_gaussian_log.py b/tests/test_gaussian_log.py index e52f93071..8d7bec818 100644 --- a/tests/test_gaussian_log.py +++ b/tests/test_gaussian_log.py @@ -3,94 +3,105 @@ import unittest from context import dpdata -class TestGaussianLog : - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], self.atom_names) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], self.atom_numbs) - +class TestGaussianLog: + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], self.atom_names) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], self.atom_numbs) + def test_nframes(self): self.assertEqual(len(self.system), self.nframes) - def test_atom_types(self) : - for ii in range(len(self.atom_types)) : - self.assertEqual(self.system.data['atom_types'][ii], self.atom_types[ii]) + def test_atom_types(self): + for ii in range(len(self.atom_types)): + self.assertEqual(self.system.data["atom_types"][ii], self.atom_types[ii]) def test_nopbc(self): self.assertEqual(self.system.nopbc, True) + class TestGaussianLoadLog(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/methane.gaussianlog', - fmt = 'gaussian/log') - self.atom_names = ['C','H'] + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + self.atom_names = ["C", "H"] self.atom_numbs = [1, 4] self.nframes = 1 self.atom_types = [0, 1, 1, 1, 1] + class TestGaussianLoadLargeForceLog(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/largeforce.gaussianlog', - fmt = 'gaussian/log') - self.atom_names = ['C','H','O','S'] - self.atom_numbs = [33 , 65, 22, 6] + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/largeforce.gaussianlog", fmt="gaussian/log" + ) + self.atom_names = ["C", "H", "O", "S"] + self.atom_numbs = [33, 65, 22, 6] self.nframes = 1 self.atom_types = [0] * 33 + [2] * 22 + [1] * 65 + [3] * 6 - + + class TestGaussianLoadMD(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/aimd_gaussian_CH4_output', - fmt = 'gaussian/md') - self.atom_names = ['C','H'] + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/aimd_gaussian_CH4_output", fmt="gaussian/md" + ) + self.atom_names = ["C", "H"] self.atom_numbs = [1, 4] self.nframes = 22 self.atom_types = [1, 1, 1, 1, 0] class TestNonCoveragedGaussianLoadLog(unittest.TestCase, TestGaussianLog): - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', - fmt = 'gaussian/log') + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) self.atom_names = [] self.atom_numbs = [] self.nframes = 0 - - def test_atom_types(self) : - self.assertEqual(self.system.data['atom_types'], []) - def test_cells(self) : - self.assertEqual(self.system.data['cells'], []) + def test_atom_types(self): + self.assertEqual(self.system.data["atom_types"], []) + + def test_cells(self): + self.assertEqual(self.system.data["cells"], []) - def test_coords(self) : - self.assertEqual(self.system.data['coords'], []) + def test_coords(self): + self.assertEqual(self.system.data["coords"], []) - def test_energies(self) : - self.assertEqual(self.system.data['energies'], []) + def test_energies(self): + self.assertEqual(self.system.data["energies"], []) - def test_forces(self) : - self.assertEqual(self.system.data['forces'], []) + def test_forces(self): + self.assertEqual(self.system.data["forces"], []) - def test_virials(self) : - self.assertFalse('virials' in self.system.data) + def test_virials(self): + self.assertFalse("virials" in self.system.data) class TestGaussianLoadPBCLog(unittest.TestCase, TestGaussianLog): """PBC.""" - def setUp (self) : - self.system = dpdata.LabeledSystem('gaussian/h2pbc.gaussianlog', - fmt = 'gaussian/log') - self.atom_names = ['H'] + + def setUp(self): + self.system = dpdata.LabeledSystem( + "gaussian/h2pbc.gaussianlog", fmt="gaussian/log" + ) + self.atom_names = ["H"] self.atom_numbs = [2] self.nframes = 1 self.atom_types = [0, 0] self.cells = (np.eye(3) * 10.0).reshape(1, 3, 3) - def test_cells(self) : - self.assertTrue(np.allclose(self.system.data['cells'], self.cells)) + def test_cells(self): + self.assertTrue(np.allclose(self.system.data["cells"], self.cells)) def test_nopbc(self): self.assertEqual(self.system.nopbc, False) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_gromacs_gro.py b/tests/test_gromacs_gro.py index 16070448b..d2553809d 100644 --- a/tests/test_gromacs_gro.py +++ b/tests/test_gromacs_gro.py @@ -3,111 +3,152 @@ import unittest from context import dpdata + class TestGromacsGro(unittest.TestCase): def test_read_file(self): - system = dpdata.System('gromacs/1h.gro', type_map=['H', 'O']) - self.assertTrue('H' in system['atom_names']) - self.assertTrue('O' in system['atom_names']) - self.assertEqual(system['atom_numbs'], [6, 3]) - for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): - self.assertEqual(system['atom_types'][cc], ii) - self.assertEqual(len(system['cells']), 1) - self.assertEqual(len(system['coords']), 1) + system = dpdata.System("gromacs/1h.gro", type_map=["H", "O"]) + self.assertTrue("H" in system["atom_names"]) + self.assertTrue("O" in system["atom_names"]) + self.assertEqual(system["atom_numbs"], [6, 3]) + for cc, ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): + self.assertEqual(system["atom_types"][cc], ii) + self.assertEqual(len(system["cells"]), 1) + self.assertEqual(len(system["coords"]), 1) for ii in range(3): for jj in range(3): if ii != jj: - self.assertAlmostEqual(system['cells'][0][ii][jj], 0) - self.assertAlmostEqual(system['cells'][0][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][0][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][0][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][0][8][0], 7.43) - self.assertAlmostEqual(system['coords'][0][8][1], 5.12) - self.assertAlmostEqual(system['coords'][0][8][2], 3.36) + self.assertAlmostEqual(system["cells"][0][ii][jj], 0) + self.assertAlmostEqual(system["cells"][0][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][0][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][0][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][0][8][0], 7.43) + self.assertAlmostEqual(system["coords"][0][8][1], 5.12) + self.assertAlmostEqual(system["coords"][0][8][2], 3.36) def test_read_file_tri(self): - system = dpdata.System('gromacs/1h.tri.gro', type_map=['H', 'O']) - self.assertTrue('H' in system['atom_names']) - self.assertTrue('O' in system['atom_names']) - self.assertEqual(system['atom_numbs'], [6, 3]) - for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): - self.assertEqual(system['atom_types'][cc], ii) - self.assertEqual(len(system['cells']), 1) - self.assertEqual(len(system['coords']), 1) + system = dpdata.System("gromacs/1h.tri.gro", type_map=["H", "O"]) + self.assertTrue("H" in system["atom_names"]) + self.assertTrue("O" in system["atom_names"]) + self.assertEqual(system["atom_numbs"], [6, 3]) + for cc, ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): + self.assertEqual(system["atom_types"][cc], ii) + self.assertEqual(len(system["cells"]), 1) + self.assertEqual(len(system["coords"]), 1) count = 0 for ii in range(3): for jj in range(3): if ii != jj: - self.assertAlmostEqual(system['cells'][0][ii][jj], count) + self.assertAlmostEqual(system["cells"][0][ii][jj], count) count += 1 - self.assertAlmostEqual(system['cells'][0][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][0][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][0][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][0][8][0], 7.43) - self.assertAlmostEqual(system['coords'][0][8][1], 5.12) - self.assertAlmostEqual(system['coords'][0][8][2], 3.36) - system.to('vasp/poscar', 'POSCAR') + self.assertAlmostEqual(system["cells"][0][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][0][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][0][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][0][8][0], 7.43) + self.assertAlmostEqual(system["coords"][0][8][1], 5.12) + self.assertAlmostEqual(system["coords"][0][8][2], 3.36) + system.to("vasp/poscar", "POSCAR") + class TestGromacsGroMultiFrames(unittest.TestCase): def test_read_file(self): - system = dpdata.System('gromacs/multi_frames.gro', type_map=['H', 'O']) - self.assertTrue('H' in system['atom_names']) - self.assertTrue('O' in system['atom_names']) - self.assertEqual(system['atom_numbs'], [6, 3]) - for cc,ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): - self.assertEqual(system['atom_types'][cc], ii) - self.assertEqual(len(system['cells']), 2) - self.assertEqual(len(system['coords']), 2) + system = dpdata.System("gromacs/multi_frames.gro", type_map=["H", "O"]) + self.assertTrue("H" in system["atom_names"]) + self.assertTrue("O" in system["atom_names"]) + self.assertEqual(system["atom_numbs"], [6, 3]) + for cc, ii in enumerate([1, 0, 0, 1, 0, 0, 1, 0, 0]): + self.assertEqual(system["atom_types"][cc], ii) + self.assertEqual(len(system["cells"]), 2) + self.assertEqual(len(system["coords"]), 2) for ii in range(3): for jj in range(3): if ii != jj: - self.assertAlmostEqual(system['cells'][0][ii][jj], 0) # frame no.1 - self.assertAlmostEqual(system['cells'][1][ii][jj], 0) # frame no.2 + self.assertAlmostEqual(system["cells"][0][ii][jj], 0) # frame no.1 + self.assertAlmostEqual(system["cells"][1][ii][jj], 0) # frame no.2 # frame no.1 - self.assertAlmostEqual(system['cells'][0][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][0][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][0][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][0][8][0], 7.43) - self.assertAlmostEqual(system['coords'][0][8][1], 5.12) - self.assertAlmostEqual(system['coords'][0][8][2], 3.36) + self.assertAlmostEqual(system["cells"][0][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][0][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][0][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][0][8][0], 7.43) + self.assertAlmostEqual(system["coords"][0][8][1], 5.12) + self.assertAlmostEqual(system["coords"][0][8][2], 3.36) # frame no.2 - self.assertAlmostEqual(system['cells'][1][0][0], 7.822838765564372) - self.assertAlmostEqual(system['cells'][1][1][1], 7.353572647182051) - self.assertAlmostEqual(system['cells'][1][2][2], 9.036518515423753) - self.assertAlmostEqual(system['coords'][1][8][0], 7.43) - self.assertAlmostEqual(system['coords'][1][8][1], 5.12) - self.assertAlmostEqual(system['coords'][1][8][2], 3.36) + self.assertAlmostEqual(system["cells"][1][0][0], 7.822838765564372) + self.assertAlmostEqual(system["cells"][1][1][1], 7.353572647182051) + self.assertAlmostEqual(system["cells"][1][2][2], 9.036518515423753) + self.assertAlmostEqual(system["coords"][1][8][0], 7.43) + self.assertAlmostEqual(system["coords"][1][8][1], 5.12) + self.assertAlmostEqual(system["coords"][1][8][2], 3.36) class TestFormatAtomName(unittest.TestCase): def test_format_atom_name(self): - system = dpdata.System("gromacs/case_for_format_atom_name.gro", fmt='gromacs/gro', type_map=['H','C','N','O','Cl']) + system = dpdata.System( + "gromacs/case_for_format_atom_name.gro", + fmt="gromacs/gro", + type_map=["H", "C", "N", "O", "Cl"], + ) self.assertEqual(system.formula, "H11C14N3O2Cl2") - + def test_no_format_atom_name(self): - system = dpdata.System("gromacs/case_for_format_atom_name.gro", fmt='gromacs/gro', format_atom_name=False) - atoms = ['CL1', 'H6', 'C4', 'C3', 'C6', 'C11', 'H10', 'C2', 'N3', 'C14', - 'H7', 'H8', 'C13', 'H2', 'H1', 'H4', 'O2', 'H9', 'O1', 'N2', 'C9', - 'H3', 'C5', 'H11', 'N1', 'C7', 'C10', 'CL2', 'H5', 'C1', 'C8','C12'] + system = dpdata.System( + "gromacs/case_for_format_atom_name.gro", + fmt="gromacs/gro", + format_atom_name=False, + ) + atoms = [ + "CL1", + "H6", + "C4", + "C3", + "C6", + "C11", + "H10", + "C2", + "N3", + "C14", + "H7", + "H8", + "C13", + "H2", + "H1", + "H4", + "O2", + "H9", + "O1", + "N2", + "C9", + "H3", + "C5", + "H11", + "N1", + "C7", + "C10", + "CL2", + "H5", + "C1", + "C8", + "C12", + ] for at in atoms: - self.assertTrue(at in system['atom_names']) + self.assertTrue(at in system["atom_names"]) class TestDumpGromacsGro(unittest.TestCase): def setUp(self): - self.system = dpdata.System('gromacs/multi_frames.gro', type_map=['H', 'O']) - + self.system = dpdata.System("gromacs/multi_frames.gro", type_map=["H", "O"]) + def test_dump_single_frame(self): - self.system.to_gromacs_gro('gromacs/tmp_1.gro', frame_idx=0) - tmp = dpdata.System('gromacs/tmp_1.gro', type_map=['H', 'O']) - self.assertEqual(tmp.get_nframes(), 1) + self.system.to_gromacs_gro("gromacs/tmp_1.gro", frame_idx=0) + tmp = dpdata.System("gromacs/tmp_1.gro", type_map=["H", "O"]) + self.assertEqual(tmp.get_nframes(), 1) def test_dump_multi_frames(self): - self.system.to_gromacs_gro('gromacs/tmp_2.gro') - tmp = dpdata.System('gromacs/tmp_2.gro', type_map=['H', 'O']) + self.system.to_gromacs_gro("gromacs/tmp_2.gro") + tmp = dpdata.System("gromacs/tmp_2.gro", type_map=["H", "O"]) self.assertEqual(tmp.get_nframes(), 2) - + def tearDown(self): - if os.path.exists('gromacs/tmp_1.gro'): - os.remove('gromacs/tmp_1.gro') - if os.path.exists('gromacs/tmp_2.gro'): - os.remove('gromacs/tmp_2.gro') + if os.path.exists("gromacs/tmp_1.gro"): + os.remove("gromacs/tmp_1.gro") + if os.path.exists("gromacs/tmp_2.gro"): + os.remove("gromacs/tmp_2.gro") diff --git a/tests/test_json.py b/tests/test_json.py index 98be14040..7337d6820 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -4,25 +4,26 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestJsonLoad(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') - self.system_2 = dpdata.LabeledSystem.load('poscars/h2o.md.json') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") + self.system_2 = dpdata.LabeledSystem.load("poscars/h2o.md.json") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestAsDict(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") self.system_2 = dpdata.LabeledSystem.from_dict(self.system_1.as_dict()) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_lammps_dump_idx.py b/tests/test_lammps_dump_idx.py index 7f834e818..110f4bc64 100644 --- a/tests/test_lammps_dump_idx.py +++ b/tests/test_lammps_dump_idx.py @@ -5,17 +5,22 @@ import unittest from context import dpdata + class TestLmpDumpIdx(unittest.TestCase): def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf2.dump')) - - def test_coords(self): - np.testing.assert_allclose(self.system['coords'], np.array( - [[[0., 0., 0.], - [1.2621856, 0.7018028, 0.5513885]]] - )) - + self.system = dpdata.System(os.path.join("poscars", "conf2.dump")) + + def test_coords(self): + np.testing.assert_allclose( + self.system["coords"], + np.array([[[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]]), + ) + def test_type(self): - np.testing.assert_allclose(self.system.get_atom_types(), np.array( - [1, 0], dtype=int, - )) + np.testing.assert_allclose( + self.system.get_atom_types(), + np.array( + [1, 0], + dtype=int, + ), + ) diff --git a/tests/test_lammps_dump_shift_origin.py b/tests/test_lammps_dump_shift_origin.py index 00754d1a5..eec6b4f32 100644 --- a/tests/test_lammps_dump_shift_origin.py +++ b/tests/test_lammps_dump_shift_origin.py @@ -4,11 +4,13 @@ from context import dpdata from comp_sys import CompSys, IsPBC + class TestLammpsDumpShiftOrigin(unittest.TestCase, CompSys, IsPBC): - def setUp (self): - self.system_1 = dpdata.System('poscars/shift_origin.dump', fmt = 'lammps/dump')[0] - self.system_2 = dpdata.System('poscars/shift_origin.dump', fmt = 'lammps/dump')[1] + def setUp(self): + self.system_1 = dpdata.System("poscars/shift_origin.dump", fmt="lammps/dump")[0] + self.system_2 = dpdata.System("poscars/shift_origin.dump", fmt="lammps/dump")[1] self.places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_lammps_dump_skipload.py b/tests/test_lammps_dump_skipload.py index 9e6502f61..d604607a4 100644 --- a/tests/test_lammps_dump_skipload.py +++ b/tests/test_lammps_dump_skipload.py @@ -4,18 +4,15 @@ from context import dpdata from comp_sys import CompSys, IsPBC + class TestLmpDumpSkip(unittest.TestCase, CompSys, IsPBC): - - def setUp(self): - self.system_1 = dpdata.System(os.path.join('poscars', 'conf.5.dump'), - type_map = ['O', 'H'], - begin = 1, - step = 2) - self.system_2 = dpdata.System(os.path.join('poscars', 'conf.5.dump'), - type_map = ['O', 'H'], - begin = 0, - step = 1) \ - .sub_system(np.arange(1,5,2)) + def setUp(self): + self.system_1 = dpdata.System( + os.path.join("poscars", "conf.5.dump"), type_map=["O", "H"], begin=1, step=2 + ) + self.system_2 = dpdata.System( + os.path.join("poscars", "conf.5.dump"), type_map=["O", "H"], begin=0, step=1 + ).sub_system(np.arange(1, 5, 2)) self.places = 6 self.e_places = 6 self.f_places = 6 diff --git a/tests/test_lammps_dump_to_system.py b/tests/test_lammps_dump_to_system.py index b63b3af80..739f4e439 100644 --- a/tests/test_lammps_dump_to_system.py +++ b/tests/test_lammps_dump_to_system.py @@ -2,25 +2,26 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf.dump'), - type_map = ['O', 'H']) - + def setUp(self): + self.system = dpdata.System( + os.path.join("poscars", "conf.dump"), type_map=["O", "H"] + ) + + class TestDump2(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.tmp_system = dpdata.System(os.path.join('poscars', 'conf.dump'), - type_map = ['O', 'H']) + def setUp(self): + self.tmp_system = dpdata.System( + os.path.join("poscars", "conf.dump"), type_map=["O", "H"] + ) self.system = self.tmp_system.sub_system([1]) - def test_nframes (self) : + def test_nframes(self): self.assertEqual(self.tmp_system.get_nframes(), 2) - - -if __name__ == '__main__': + + +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_dump_unfold.py b/tests/test_lammps_dump_unfold.py index 68aa2c8b7..bce95f7ba 100644 --- a/tests/test_lammps_dump_unfold.py +++ b/tests/test_lammps_dump_unfold.py @@ -2,35 +2,36 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.system = dpdata.System(os.path.join('poscars', 'conf_unfold.dump'), - type_map = ['O', 'H']) - + def setUp(self): + self.system = dpdata.System( + os.path.join("poscars", "conf_unfold.dump"), type_map=["O", "H"] + ) + + class TestDump2(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - self.tmp_system = dpdata.System(os.path.join('poscars', 'conf_unfold.dump'), - type_map = ['O', 'H']) + def setUp(self): + self.tmp_system = dpdata.System( + os.path.join("poscars", "conf_unfold.dump"), type_map=["O", "H"] + ) self.system = self.tmp_system.sub_system([1]) - def test_nframes (self) : + def test_nframes(self): self.assertEqual(self.tmp_system.get_nframes(), 2) - - + + class TestDumpUnwrap(unittest.TestCase, TestPOSCARoh): def setUp(self): self.unwrap = True self.system = dpdata.System( - os.path.join('poscars', 'conf_unfold.dump'), - type_map=['O', 'H'], + os.path.join("poscars", "conf_unfold.dump"), + type_map=["O", "H"], unwrap=self.unwrap, ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_lmp_dump.py b/tests/test_lammps_lmp_dump.py index f33546485..2ded5e448 100644 --- a/tests/test_lammps_lmp_dump.py +++ b/tests/test_lammps_lmp_dump.py @@ -2,28 +2,28 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestLmpDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - tmp_system = dpdata.System(os.path.join('poscars', 'conf.lmp'), - type_map = ['O', 'H']) - tmp_system.to_lammps_lmp('tmp.lmp') + def setUp(self): + tmp_system = dpdata.System( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to_lammps_lmp("tmp.lmp") self.system = dpdata.System() - self.system.from_lammps_lmp('tmp.lmp', - type_map = ['O', 'H']) + self.system.from_lammps_lmp("tmp.lmp", type_map=["O", "H"]) + class TestToFunc(unittest.TestCase, TestPOSCARoh): - - def setUp(self): - tmp_system = dpdata.System(os.path.join('poscars', 'conf.lmp'), - type_map = ['O', 'H']) - tmp_system.to('lammps/lmp', 'tmp.lmp') + def setUp(self): + tmp_system = dpdata.System( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to("lammps/lmp", "tmp.lmp") self.system = dpdata.System() - self.system.from_fmt('tmp.lmp', fmt='lammps/lmp', - type_map = ['O', 'H']) + self.system.from_fmt("tmp.lmp", fmt="lammps/lmp", type_map=["O", "H"]) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_lmp_to_system.py b/tests/test_lammps_lmp_to_system.py index 534027235..ea8d21574 100644 --- a/tests/test_lammps_lmp_to_system.py +++ b/tests/test_lammps_lmp_to_system.py @@ -2,15 +2,16 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + class TestLmp(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), - type_map = ['O', 'H']) - -if __name__ == '__main__': + self.system.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + + +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_lammps_read_from_trajs.py b/tests/test_lammps_read_from_trajs.py index 338ea1c3d..128aedb71 100644 --- a/tests/test_lammps_read_from_trajs.py +++ b/tests/test_lammps_read_from_trajs.py @@ -3,43 +3,130 @@ import unittest from context import dpdata + class TestLmpReadFromTrajsWithRandomTypeId(unittest.TestCase): - - def setUp(self): - self.system = \ - dpdata.System(os.path.join('lammps', 'traj_with_random_type_id.dump'), fmt = 'lammps/dump', type_map = ["Ta","Nb","W","Mo","V","Al"]) - - def test_nframes (self) : + def setUp(self): + self.system = dpdata.System( + os.path.join("lammps", "traj_with_random_type_id.dump"), + fmt="lammps/dump", + type_map=["Ta", "Nb", "W", "Mo", "V", "Al"], + ) + + def test_nframes(self): self.system.sort_atom_types() - atype = self.system['atom_types'].tolist() + atype = self.system["atom_types"].tolist() self.assertTrue(atype == [1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5]) - - coord = self.system['coords'].reshape([2, -1]) - - coord0_std = np.array([6.69832 , 3.39136 , 3.34005 , 1.80744 , 5.08708 , 5.01099 , - 5.10512 , 5.08007 , 5.01272 , 1.70086 , 1.69544 , 1.66979 , - 3.48873 , 0.0697213, 6.67774 , 3.38621 , 0.033338 , 3.34239 , - 1.79424 , 1.7281 , 5.01015 , 3.48973 , 3.42896 , 6.67795 , - 3.40064 , 3.39148 , 3.34188 , 5.09069 , 1.72876 , 5.00917 , - 0.119885 , 6.74841 , 3.33869 , 4.99379 , 1.69262 , 1.67183 , - 0.199838 , 3.4185 , 6.67565 , 1.7213 , 5.05235 , 1.66373 , - 0.21494 , 6.77616 , 6.67623 , 5.00691 , 5.05 , 1.66532 ]) + + coord = self.system["coords"].reshape([2, -1]) + + coord0_std = np.array( + [ + 6.69832, + 3.39136, + 3.34005, + 1.80744, + 5.08708, + 5.01099, + 5.10512, + 5.08007, + 5.01272, + 1.70086, + 1.69544, + 1.66979, + 3.48873, + 0.0697213, + 6.67774, + 3.38621, + 0.033338, + 3.34239, + 1.79424, + 1.7281, + 5.01015, + 3.48973, + 3.42896, + 6.67795, + 3.40064, + 3.39148, + 3.34188, + 5.09069, + 1.72876, + 5.00917, + 0.119885, + 6.74841, + 3.33869, + 4.99379, + 1.69262, + 1.67183, + 0.199838, + 3.4185, + 6.67565, + 1.7213, + 5.05235, + 1.66373, + 0.21494, + 6.77616, + 6.67623, + 5.00691, + 5.05, + 1.66532, + ] + ) self.assertTrue(np.allclose(coord[0, ...], coord0_std)) - coord1_std = np.array([4.85582828e+00, 5.12324490e+00, 1.55763728e+00, 1.82031828e+00, - 1.61210490e+00, 4.91329728e+00, 5.15568828e+00, 4.91296490e+00, - 5.02114728e+00, 1.67640828e+00, 1.62756490e+00, 1.61183728e+00, - 3.41785828e+00, 6.54050490e+00, 3.42793728e+00, 3.39324828e+00, - 3.47558490e+00, 6.50564728e+00, 3.43286828e+00, 3.44029490e+00, - 3.37871728e+00, 6.60497828e+00, 3.46782490e+00, 3.42949728e+00, - 1.82021828e+00, 5.08114490e+00, 4.93158728e+00, 5.20431828e+00, - 1.80972490e+00, 5.00061728e+00, 6.56278828e+00, 6.62718490e+00, - 3.35101728e+00, 4.97045828e+00, 1.80536490e+00, 1.73358728e+00, - 6.61765828e+00, 3.43486490e+00, 6.48447728e+00, 1.57899828e+00, - 4.89261490e+00, 1.63632728e+00, 6.59585828e+00, 1.40657901e-01, - 6.51767728e+00, 3.30914005e+00, 7.86399766e-02, 6.66581642e-04]) + coord1_std = np.array( + [ + 4.85582828e00, + 5.12324490e00, + 1.55763728e00, + 1.82031828e00, + 1.61210490e00, + 4.91329728e00, + 5.15568828e00, + 4.91296490e00, + 5.02114728e00, + 1.67640828e00, + 1.62756490e00, + 1.61183728e00, + 3.41785828e00, + 6.54050490e00, + 3.42793728e00, + 3.39324828e00, + 3.47558490e00, + 6.50564728e00, + 3.43286828e00, + 3.44029490e00, + 3.37871728e00, + 6.60497828e00, + 3.46782490e00, + 3.42949728e00, + 1.82021828e00, + 5.08114490e00, + 4.93158728e00, + 5.20431828e00, + 1.80972490e00, + 5.00061728e00, + 6.56278828e00, + 6.62718490e00, + 3.35101728e00, + 4.97045828e00, + 1.80536490e00, + 1.73358728e00, + 6.61765828e00, + 3.43486490e00, + 6.48447728e00, + 1.57899828e00, + 4.89261490e00, + 1.63632728e00, + 6.59585828e00, + 1.40657901e-01, + 6.51767728e00, + 3.30914005e00, + 7.86399766e-02, + 6.66581642e-04, + ] + ) self.assertTrue(np.allclose(coord[1, ...], coord1_std)) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_msd.py b/tests/test_msd.py index 509e5c30a..fd62d13c1 100644 --- a/tests/test_msd.py +++ b/tests/test_msd.py @@ -3,35 +3,29 @@ import unittest from context import dpdata -class TestMSD (unittest.TestCase) : - def setUp(self) : + +class TestMSD(unittest.TestCase): + def setUp(self): self.system = dpdata.System() - self.system.data['atom_types'] = np.array([0,1]) - self.system.data['atom_names'] = ['O', 'H'] + self.system.data["atom_types"] = np.array([0, 1]) + self.system.data["atom_names"] = ["O", "H"] nframes = 10 cell_size = 5 - self.system.data['cells'] = np.tile(cell_size * np.eye(3), - (nframes,1,1)) - self.system.data['coords'] = np.zeros([nframes, 2, 3]) - for ff in range(nframes) : - self.system.data['coords'][ff][0] = 1.0 * ff * np.array([1,0,0]) - self.system.data['coords'][ff][1] = 2.0 * ff * np.array([1,0,0]) - self.system.data['coords'] = self.system.data['coords'] % cell_size - - def test_msd(self) : + self.system.data["cells"] = np.tile(cell_size * np.eye(3), (nframes, 1, 1)) + self.system.data["coords"] = np.zeros([nframes, 2, 3]) + for ff in range(nframes): + self.system.data["coords"][ff][0] = 1.0 * ff * np.array([1, 0, 0]) + self.system.data["coords"][ff][1] = 2.0 * ff * np.array([1, 0, 0]) + self.system.data["coords"] = self.system.data["coords"] % cell_size + + def test_msd(self): # print(self.system['atom_types'] == 0) msd = dpdata.md.msd.msd(self.system) - msd0 = dpdata.md.msd.msd(self.system, self.system['atom_types'] == 0) - msd1 = dpdata.md.msd.msd(self.system, self.system['atom_types'] == 1) + msd0 = dpdata.md.msd.msd(self.system, self.system["atom_types"] == 0) + msd1 = dpdata.md.msd.msd(self.system, self.system["atom_types"] == 1) # print(msd) ncomp = msd.shape[0] - for ii in range(ncomp) : - self.assertAlmostEqual(msd0[ii], - ii * ii, - msg = 'msd0[%d]' % ii) - self.assertAlmostEqual(msd1[ii], - ii * ii * 4, - msg = 'msd1[%d]' % ii) - self.assertAlmostEqual(msd[ii], - (msd0[ii]+msd1[ii]) * 0.5, - 'msd[%d]' % ii) + for ii in range(ncomp): + self.assertAlmostEqual(msd0[ii], ii * ii, msg="msd0[%d]" % ii) + self.assertAlmostEqual(msd1[ii], ii * ii * 4, msg="msd1[%d]" % ii) + self.assertAlmostEqual(msd[ii], (msd0[ii] + msd1[ii]) * 0.5, "msd[%d]" % ii) diff --git a/tests/test_multisystems.py b/tests/test_multisystems.py index 6e05861b3..689b1a869 100644 --- a/tests/test_multisystems.py +++ b/tests/test_multisystems.py @@ -16,19 +16,27 @@ def setUp(self): self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) self.systems = dpdata.MultiSystems(system_1, system_3, system_4) self.systems.append(system_2) - self.system_1 = self.systems['C1H3'] + self.system_1 = self.systems["C1H3"] self.system_2 = system_3 - - self.system_names = ['C1H4', 'C1H3'] - self.system_sizes = {'C1H4':2, 'C1H3':1} - self.atom_names = ['C', 'H'] + + self.system_names = ["C1H4", "C1H3"] + self.system_sizes = {"C1H4": 2, "C1H3": 1} + self.atom_names = ["C", "H"] def test_len(self): self.assertEqual(len(self.systems), 2) @@ -37,7 +45,9 @@ def test_get_nframes(self): self.assertEqual(self.systems.get_nframes(), 3) def test_str(self): - self.assertEqual(str(self.systems), "MultiSystems (2 systems containing 3 frames)") + self.assertEqual( + str(self.systems), "MultiSystems (2 systems containing 3 frames)" + ) class TestMultiSystemsAdd(unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC): @@ -47,97 +57,151 @@ def setUp(self): self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) self.systems = dpdata.MultiSystems(system_1) self.systems += system_2 self.systems += system_3 self.systems += system_4 for s in self.systems: - if s.formula == 'C1H3': + if s.formula == "C1H3": self.system_1 = s self.system_2 = system_3 - - self.system_names = ['C1H4', 'C1H3'] - self.system_sizes = {'C1H4':2, 'C1H3':1} - self.atom_names = ['C', 'H'] + + self.system_names = ["C1H4", "C1H3"] + self.system_sizes = {"C1H4": 2, "C1H3": 1} + self.atom_names = ["C", "H"] class TestMultiSystemsSorted(unittest.TestCase, MultiSystems): def setUp(self): # CH4 and O2 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/oxygen.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/oxygen.gaussianlog", fmt="gaussian/log" + ) self.systems = dpdata.MultiSystems(system_1, system_2) - self.system_names = ['C1H4O0', 'C0H0O2'] - self.system_sizes = {'C1H4O0':1, 'C0H0O2':1} - self.atom_names = ['C', 'H', 'O'] - + self.system_names = ["C1H4O0", "C0H0O2"] + self.system_sizes = {"C1H4O0": 1, "C0H0O2": 1} + self.atom_names = ["C", "H", "O"] + + class TestMultiDeepmdDumpRaw(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp (self) : + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) systems = dpdata.MultiSystems(system_1, system_2, system_3, system_4) path = "tmp.deepmd.multi" systems.to_deepmd_raw(path) - self.system_1 = dpdata.LabeledSystem(os.path.join(path, 'C1H3'), fmt='deepmd/raw', type_map = ['C', 'H']) + self.system_1 = dpdata.LabeledSystem( + os.path.join(path, "C1H3"), fmt="deepmd/raw", type_map=["C", "H"] + ) self.system_2 = system_3 + class TestMultiDeepmdDumpComp(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp (self) : + def setUp(self): self.places = 6 self.e_places = 4 self.f_places = 6 self.v_places = 6 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) systems = dpdata.MultiSystems(system_1, system_2, system_3, system_4) path = "tmp.deepmd.npy.multi" systems.to_deepmd_npy(path) - self.system_1 = dpdata.LabeledSystem(os.path.join(path, 'C1H3'), fmt='deepmd/npy', type_map = ['C', 'H']) + self.system_1 = dpdata.LabeledSystem( + os.path.join(path, "C1H3"), fmt="deepmd/npy", type_map=["C", "H"] + ) self.system_2 = system_3 + class TestTypeMap(unittest.TestCase): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - self.system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') - self.system_3 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') - self.system_4 = dpdata.LabeledSystem('gaussian/noncoveraged.gaussianlog', fmt='gaussian/log') + def setUp(self): + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + self.system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) + self.system_3 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + self.system_4 = dpdata.LabeledSystem( + "gaussian/noncoveraged.gaussianlog", fmt="gaussian/log" + ) def test_type_map(self): - for type_map in permutations(['C', 'H', 'O', 'N'], 4): - systems = dpdata.MultiSystems(self.system_1, self.system_2, self.system_3, self.system_4, type_map=type_map) + for type_map in permutations(["C", "H", "O", "N"], 4): + systems = dpdata.MultiSystems( + self.system_1, + self.system_2, + self.system_3, + self.system_4, + type_map=type_map, + ) self.assertEqual(type_map, systems.atom_names) class TestMultiSystemsTo(unittest.TestCase, MultiSystems): def setUp(self): # CH4 and O2 - system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/oxygen.gaussianlog', fmt='gaussian/log') + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/oxygen.gaussianlog", fmt="gaussian/log" + ) systems1 = dpdata.MultiSystems(system_1, system_2) systems1.to_deepmd_npy("tmp.multi.to") self.systems = dpdata.MultiSystems().from_deepmd_npy("tmp.multi.to") - self.system_names = ['C1H4O0', 'C0H0O2'] - self.system_sizes = {'C1H4O0':1, 'C0H0O2':1} - self.atom_names = ['C', 'H', 'O'] + self.system_names = ["C1H4O0", "C0H0O2"] + self.system_sizes = {"C1H4O0": 1, "C0H0O2": 1} + self.atom_names = ["C", "H", "O"] + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_periodic_table.py b/tests/test_periodic_table.py index 2548a5163..40a29a1c1 100644 --- a/tests/test_periodic_table.py +++ b/tests/test_periodic_table.py @@ -1,27 +1,30 @@ import unittest from context import dpdata -data={"name": "Hydrogen", - "atomic_no": 1, - "X": 2.2, - "atomic_mass": 1.00794, - "radius": 0.25, - "calculated_radius": 0.53 - } +data = { + "name": "Hydrogen", + "atomic_no": 1, + "X": 2.2, + "atomic_mass": 1.00794, + "radius": 0.25, + "calculated_radius": 0.53, +} + class TestPeriodicTable(unittest.TestCase): - def setUp (self) : + def setUp(self): self.H = dpdata.periodic_table.Element("H") def test_H(self): - H=self.H - self.assertEqual(H.name,data['name']) - self.assertEqual(H.Z,data['atomic_no']) - self.assertEqual(H.X,data['X']) - self.assertEqual(H.mass,data['atomic_mass']) - self.assertEqual(H.radius,data['radius']) - self.assertEqual(H.calculated_radius,data['calculated_radius']) - self.assertEqual(H.X,dpdata.periodic_table.Element.from_Z(1).X) + H = self.H + self.assertEqual(H.name, data["name"]) + self.assertEqual(H.Z, data["atomic_no"]) + self.assertEqual(H.X, data["X"]) + self.assertEqual(H.mass, data["atomic_mass"]) + self.assertEqual(H.radius, data["radius"]) + self.assertEqual(H.calculated_radius, data["calculated_radius"]) + self.assertEqual(H.X, dpdata.periodic_table.Element.from_Z(1).X) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_perturb.py b/tests/test_perturb.py index fe63882f0..c047dfa4d 100644 --- a/tests/test_perturb.py +++ b/tests/test_perturb.py @@ -5,133 +5,170 @@ from comp_sys import CompSys, IsPBC from unittest.mock import Mock -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock + class NormalGenerator(object): def __init__(self): self.randn_generator = self.get_randn_generator() self.rand_generator = self.get_rand_generator() - def randn(self,number): + + def randn(self, number): return next(self.randn_generator) - def rand(self,number): + + def rand(self, number): return next(self.rand_generator) + @staticmethod def get_randn_generator(): - data = np.asarray([ - [ 0.71878148, -2.20667426, 1.49373955], - [-0.42728113, 1.43836059, -1.17553854], - [-1.70793073, -0.39588759, -0.40880927], - [ 0.17078291, -0.34856352, 1.04307936], - [-0.99103413, -0.1886479, 0.13813131], - [ 0.5839343, 1.04612646, -0.62631026], - [ 0.9752889, 1.85932517, -0.47875828], - [-0.23977172, -0.38373444, -0.04375488]]) + data = np.asarray( + [ + [0.71878148, -2.20667426, 1.49373955], + [-0.42728113, 1.43836059, -1.17553854], + [-1.70793073, -0.39588759, -0.40880927], + [0.17078291, -0.34856352, 1.04307936], + [-0.99103413, -0.1886479, 0.13813131], + [0.5839343, 1.04612646, -0.62631026], + [0.9752889, 1.85932517, -0.47875828], + [-0.23977172, -0.38373444, -0.04375488], + ] + ) count = 0 while True: yield data[count] - count +=1 - - @staticmethod + count += 1 + + @staticmethod def get_rand_generator(): - yield np.asarray([0.23182233, 0.87106847, 0.68728511, 0.94180274, 0.92860453, 0.69191187]) + yield np.asarray( + [0.23182233, 0.87106847, 0.68728511, 0.94180274, 0.92860453, 0.69191187] + ) + class UniformGenerator(object): def __init__(self): self.randn_generator = self.get_randn_generator() self.rand_generator = self.get_rand_generator() - def randn(self,number): + + def randn(self, number): return next(self.randn_generator) - def rand(self,number): + + def rand(self, number): return next(self.rand_generator) @staticmethod def get_randn_generator(): - data = [[-0.19313281, 0.80194715, 0.14050915], - [-1.47859926, 0.12921667, -0.17632456], - [-0.60836805, -0.7700423, -0.8386948 ], - [-0.03236753, 0.36690245, 0.5041072 ], - [-1.59366933, 0.37069227, 0.89608291], - [ 0.18165617, 0.53875315, -0.42233955], - [ 0.74052496, 1.26627555, -1.12094823], - [-0.89610092, -1.44247021, -1.3502529 ]] - yield np.asarray([0.0001,0.0001,0.0001]) # test for not using small vector + data = [ + [-0.19313281, 0.80194715, 0.14050915], + [-1.47859926, 0.12921667, -0.17632456], + [-0.60836805, -0.7700423, -0.8386948], + [-0.03236753, 0.36690245, 0.5041072], + [-1.59366933, 0.37069227, 0.89608291], + [0.18165617, 0.53875315, -0.42233955], + [0.74052496, 1.26627555, -1.12094823], + [-0.89610092, -1.44247021, -1.3502529], + ] + yield np.asarray([0.0001, 0.0001, 0.0001]) # test for not using small vector count = 0 while True: yield data[count] - count +=1 + count += 1 - @staticmethod + @staticmethod def get_rand_generator(): - data = np.asarray([[0.71263084], [0.61339295], - [0.22948181], [0.36087632], - [0.17582222], [0.97926742], - [0.84706761], [0.44495513]]) - - yield np.asarray([0.34453551, 0.0618966, 0.9327273, 0.43013654, 0.88624993, 0.48827425]) - count =0 + data = np.asarray( + [ + [0.71263084], + [0.61339295], + [0.22948181], + [0.36087632], + [0.17582222], + [0.97926742], + [0.84706761], + [0.44495513], + ] + ) + + yield np.asarray( + [0.34453551, 0.0618966, 0.9327273, 0.43013654, 0.88624993, 0.48827425] + ) + count = 0 while True: yield np.asarray(data[count]) - count+=1 + count += 1 + class ConstGenerator(object): def __init__(self): self.randn_generator = self.get_randn_generator() self.rand_generator = self.get_rand_generator() - def randn(self,number): + + def randn(self, number): return next(self.randn_generator) - def rand(self,number): + + def rand(self, number): return next(self.rand_generator) @staticmethod def get_randn_generator(): - data = np.asarray([[ 0.95410606, -1.62338002, -2.05359934], - [ 0.69213769, -1.26008667, 0.77970721], - [-1.77926476, -0.39227219, 2.31677298], - [ 0.08785233, -0.03966649, -0.45325656], - [-0.53860887, 0.42536802, -0.46167309], - [-0.26865791, -0.19901684, -2.51444768], - [-0.31627314, 0.22076982, -0.36032225], - [0.66731887, 1.2505806, 1.46112938]]) - yield np.asarray([0.0001,0.0001,0.0001]) # test for not using small vector + data = np.asarray( + [ + [0.95410606, -1.62338002, -2.05359934], + [0.69213769, -1.26008667, 0.77970721], + [-1.77926476, -0.39227219, 2.31677298], + [0.08785233, -0.03966649, -0.45325656], + [-0.53860887, 0.42536802, -0.46167309], + [-0.26865791, -0.19901684, -2.51444768], + [-0.31627314, 0.22076982, -0.36032225], + [0.66731887, 1.2505806, 1.46112938], + ] + ) + yield np.asarray([0.0001, 0.0001, 0.0001]) # test for not using small vector count = 0 while True: yield data[count] - count +=1 + count += 1 + + @staticmethod + def get_rand_generator(): + yield np.asarray( + [0.01525907, 0.68387374, 0.39768541, 0.55596047, 0.26557088, 0.60883073] + ) - @staticmethod - def get_rand_generator(): - yield np.asarray([0.01525907, 0.68387374, 0.39768541, 0.55596047, 0.26557088, 0.60883073]) # %% class TestPerturbNormal(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') - def setUp (self, random_mock): + @patch("numpy.random") + def setUp(self, random_mock): random_mock.rand = NormalGenerator().rand random_mock.randn = NormalGenerator().randn - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.perturb(1,0.05,0.6,'normal') - self.system_2 = dpdata.System('poscars/POSCAR.SiC.normal',fmt='vasp/poscar') + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.perturb(1, 0.05, 0.6, "normal") + self.system_2 = dpdata.System("poscars/POSCAR.SiC.normal", fmt="vasp/poscar") self.places = 6 + class TestPerturbUniform(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') - def setUp (self, random_mock) : + @patch("numpy.random") + def setUp(self, random_mock): random_mock.rand = UniformGenerator().rand random_mock.randn = UniformGenerator().randn - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.perturb(1,0.05,0.6,'uniform') - self.system_2 = dpdata.System('poscars/POSCAR.SiC.uniform',fmt='vasp/poscar') + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.perturb(1, 0.05, 0.6, "uniform") + self.system_2 = dpdata.System("poscars/POSCAR.SiC.uniform", fmt="vasp/poscar") self.places = 6 + class TestPerturbConst(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') - def setUp (self, random_mock) : + @patch("numpy.random") + def setUp(self, random_mock): random_mock.rand = ConstGenerator().rand random_mock.randn = ConstGenerator().randn - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.perturb(1,0.05,0.6,'const') - self.system_2 = dpdata.System('poscars/POSCAR.SiC.const',fmt='vasp/poscar') + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.perturb(1, 0.05, 0.6, "const") + self.system_2 = dpdata.System("poscars/POSCAR.SiC.const", fmt="vasp/poscar") self.places = 6 -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_pick_atom_idx.py b/tests/test_pick_atom_idx.py index bb6af61f7..37be5d8fe 100644 --- a/tests/test_pick_atom_idx.py +++ b/tests/test_pick_atom_idx.py @@ -2,40 +2,50 @@ import unittest from context import dpdata from comp_sys import CompSys, IsNoPBC + try: - import parmed - exist_module=True + import parmed + + exist_module = True except Exception: - exist_module=False + exist_module = False + class TestPickAtomIdx(unittest.TestCase, CompSys, IsNoPBC): - - def setUp(self): + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - self.system_1 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log').pick_atom_idx(slice(4)) - self.system_2 = dpdata.LabeledSystem('gaussian/methane_sub.gaussianlog', fmt='gaussian/log') + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ).pick_atom_idx(slice(4)) + self.system_2 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) -@unittest.skipIf(not exist_module,"skip") + +@unittest.skipIf(not exist_module, "skip") class TestPickByAmberMask(unittest.TestCase, CompSys, IsNoPBC): - - def setUp(self): - parmfile="amber/corr/qmmm.parm7" - ep = r'@%EP' + def setUp(self): + parmfile = "amber/corr/qmmm.parm7" + ep = r"@%EP" target = ":1" - cutoff = 6. + cutoff = 6.0 interactwith = "(%s)<:%f&!%s" % (target, cutoff, ep) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - self.system_1 = dpdata.LabeledSystem("amber/corr/dp_corr", fmt="deepmd/npy").pick_by_amber_mask( - parmfile, interactwith, pass_coords=True, nopbc=True)['C6EP0H11HW192O6OW96P1'] - self.system_2 = dpdata.LabeledSystem("amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy") - + self.system_1 = dpdata.LabeledSystem( + "amber/corr/dp_corr", fmt="deepmd/npy" + ).pick_by_amber_mask(parmfile, interactwith, pass_coords=True, nopbc=True)[ + "C6EP0H11HW192O6OW96P1" + ] + self.system_2 = dpdata.LabeledSystem( + "amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy" + ) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_predict.py b/tests/test_predict.py index 3ba62ec23..2176bf74e 100644 --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -3,6 +3,7 @@ from comp_sys import CompLabeledSys, IsPBC from context import dpdata + try: import ase except ModuleNotFoundError: @@ -14,36 +15,36 @@ @dpdata.driver.Driver.register("zero") class ZeroDriver(dpdata.driver.Driver): def label(self, data): - nframes = data['coords'].shape[0] - natoms = data['coords'].shape[1] - data['energies'] = np.zeros((nframes,)) - data['forces'] = np.zeros((nframes, natoms, 3)) - data['virials'] = np.zeros((nframes, 3, 3)) + nframes = data["coords"].shape[0] + natoms = data["coords"].shape[1] + data["energies"] = np.zeros((nframes,)) + data["forces"] = np.zeros((nframes, natoms, 3)) + data["virials"] = np.zeros((nframes, 3, 3)) return data @dpdata.driver.Driver.register("one") class OneDriver(dpdata.driver.Driver): def label(self, data): - nframes = data['coords'].shape[0] - natoms = data['coords'].shape[1] - data['energies'] = np.ones((nframes,)) - data['forces'] = np.ones((nframes, natoms, 3)) - data['virials'] = np.ones((nframes, 3, 3)) + nframes = data["coords"].shape[0] + natoms = data["coords"].shape[1] + data["energies"] = np.ones((nframes,)) + data["forces"] = np.ones((nframes, natoms, 3)) + data["virials"] = np.ones((nframes, 3, 3)) return data class TestPredict(unittest.TestCase, CompLabeledSys): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) self.system_1 = ori_sys.predict(driver="zero") - self.system_2 = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) - for pp in ('energies', 'forces', 'virials'): - self.system_2.data[pp][:] = 0. + self.system_2 = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) + for pp in ("energies", "forces", "virials"): + self.system_2.data[pp][:] = 0.0 self.places = 6 self.e_places = 6 @@ -53,23 +54,26 @@ def setUp (self) : class TestHybridDriver(unittest.TestCase, CompLabeledSys): """Test HybridDriver.""" - def setUp(self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) - self.system_1 = ori_sys.predict([ + + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) + self.system_1 = ori_sys.predict( + [ {"type": "one"}, {"type": "one"}, {"type": "one"}, {"type": "zero"}, ], - driver="hybrid") + driver="hybrid", + ) # sum is 3 - self.system_2 = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) - for pp in ('energies', 'forces'): - self.system_2.data[pp][:] = 3. + self.system_2 = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) + for pp in ("energies", "forces"): + self.system_2.data[pp][:] = 3.0 self.places = 6 self.e_places = 6 @@ -77,12 +81,12 @@ def setUp(self) : self.v_places = 6 -@unittest.skipIf(skip_ase,"skip ase related test. install ase to fix") +@unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestASEDriver(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) one_driver = OneDriver() self.system_1 = ori_sys.predict(driver=one_driver) self.system_2 = ori_sys.predict(one_driver.ase_calculator, driver="ase") @@ -94,10 +98,10 @@ def setUp (self) : @unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestMinimize(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) zero_driver = ZeroDriver() self.system_1 = ori_sys.predict(driver=zero_driver) self.system_2 = ori_sys.minimize(driver=zero_driver, minimizer="ase") @@ -109,14 +113,16 @@ def setUp (self) : @unittest.skipIf(skip_ase, "skip ase related test. install ase to fix") class TestMinimizeMultiSystems(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - ori_sys = dpdata.LabeledSystem('poscars/deepmd.h2o.md', - fmt = 'deepmd/raw', - type_map = ['O', 'H']) + def setUp(self): + ori_sys = dpdata.LabeledSystem( + "poscars/deepmd.h2o.md", fmt="deepmd/raw", type_map=["O", "H"] + ) multi_sys = dpdata.MultiSystems(ori_sys) zero_driver = ZeroDriver() self.system_1 = list(multi_sys.predict(driver=zero_driver).systems.values())[0] - self.system_2 = list(multi_sys.minimize(driver=zero_driver, minimizer="ase").systems.values())[0] + self.system_2 = list( + multi_sys.minimize(driver=zero_driver, minimizer="ase").systems.values() + )[0] self.places = 6 self.e_places = 6 self.f_places = 6 diff --git a/tests/test_pwmat_config_dump.py b/tests/test_pwmat_config_dump.py index ec77ac6ea..32c6ee529 100644 --- a/tests/test_pwmat_config_dump.py +++ b/tests/test_pwmat_config_dump.py @@ -2,47 +2,58 @@ import numpy as np import unittest import dpdata -from pwmat.config_ref_oh import Testconfigoh +from pwmat.config_ref_oh import Testconfigoh + def myfilecmp(test, f0, f1): - with open(f0) as fp0 : + with open(f0) as fp0: with open(f1) as fp1: test.assertTrue(fp0.read() == fp1.read()) + class TestatomconfigDump(unittest.TestCase, Testconfigoh): - def setUp(self): tmp_system = dpdata.System() - tmp_system.from_lammps_lmp(os.path.join('pwmat', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_pwmat_atomconfig('tmp.atom.config') + tmp_system.from_lammps_lmp( + os.path.join("pwmat", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to_pwmat_atomconfig("tmp.atom.config") self.system = dpdata.System() - self.system.from_pwmat_atomconfig('tmp.atom.config') + self.system.from_pwmat_atomconfig("tmp.atom.config") + class TestatomconfigDump1(unittest.TestCase, Testconfigoh): - - def setUp(self): + def setUp(self): tmp_system = dpdata.System() - tmp_system.from_pwmat_atomconfig(os.path.join('pwmat', 'atom.config.oh')) + tmp_system.from_pwmat_atomconfig(os.path.join("pwmat", "atom.config.oh")) # tmp_system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_pwmat_atomconfig('tmp.atom.config') + tmp_system.to_pwmat_atomconfig("tmp.atom.config") self.system = dpdata.System() - self.system.from_pwmat_atomconfig('tmp.atom.config') + self.system.from_pwmat_atomconfig("tmp.atom.config") + -class TestatomconfigSkipZeroAtomNumb(unittest.TestCase) : +class TestatomconfigSkipZeroAtomNumb(unittest.TestCase): def tearDown(self): - if os.path.isfile('atom.config.tmp.1'): - os.remove('atom.config.tmp.1') - if os.path.isfile('atom.config.tmp.2'): - os.remove('atom.config.tmp.2') + if os.path.isfile("atom.config.tmp.1"): + os.remove("atom.config.tmp.1") + if os.path.isfile("atom.config.tmp.2"): + os.remove("atom.config.tmp.2") def test_dump_pwmat_type_map(self): - system0 = dpdata.System(os.path.join('pwmat', 'atom.config.oh'), fmt = 'pwmat/atom.config', type_map = ['H', 'O']) - system0.to_pwmat_atomconfig('atom.config.tmp.1') - system1 = dpdata.System(os.path.join('pwmat', 'atom.config.oh'), fmt = 'pwmat/atom.config', type_map = ['C', 'H', 'A', 'O', 'B']) - system1.to_pwmat_atomconfig('atom.config.tmp.2') - myfilecmp(self, 'atom.config.tmp.1', 'atom.config.tmp.2') + system0 = dpdata.System( + os.path.join("pwmat", "atom.config.oh"), + fmt="pwmat/atom.config", + type_map=["H", "O"], + ) + system0.to_pwmat_atomconfig("atom.config.tmp.1") + system1 = dpdata.System( + os.path.join("pwmat", "atom.config.oh"), + fmt="pwmat/atom.config", + type_map=["C", "H", "A", "O", "B"], + ) + system1.to_pwmat_atomconfig("atom.config.tmp.2") + myfilecmp(self, "atom.config.tmp.1", "atom.config.tmp.2") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_pwmat_config_to_system.py b/tests/test_pwmat_config_to_system.py index 65772a704..3ff43b66e 100644 --- a/tests/test_pwmat_config_to_system.py +++ b/tests/test_pwmat_config_to_system.py @@ -4,18 +4,24 @@ import dpdata from pwmat.config_ref_ch4 import Testconfigch4 + class Testconfig(unittest.TestCase, Testconfigch4): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_pwmat_atomconfig(os.path.join('pwmat', 'atom.config')) + self.system.from_pwmat_atomconfig(os.path.join("pwmat", "atom.config")) + + class TestpwmatconfigTypeMap(unittest.TestCase): def setUp(self): - sys0 = dpdata.System('pwmat/atom.config', fmt = 'atom.config') - sys0.data['atom_names'] = ['A', 'H', 'B', 'C', 'D'] - sys0.data['atom_numbs'] = [ 0, 1, 0, 1, 0] - sys0.data['atom_types'] = np.array([ 0, 0, 0, 1], dtype = int) - sys1 = dpdata.System('pwmat/atom.config', fmt = 'pwmat/atom.config', type_map = ['A', 'H', 'B', 'C', 'D']) + sys0 = dpdata.System("pwmat/atom.config", fmt="atom.config") + sys0.data["atom_names"] = ["A", "H", "B", "C", "D"] + sys0.data["atom_numbs"] = [0, 1, 0, 1, 0] + sys0.data["atom_types"] = np.array([0, 0, 0, 1], dtype=int) + sys1 = dpdata.System( + "pwmat/atom.config", + fmt="pwmat/atom.config", + type_map=["A", "H", "B", "C", "D"], + ) self.system_1 = sys0 self.system_2 = sys1 self.places = 6 @@ -24,5 +30,5 @@ def setUp(self): self.v_places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_pwmat_mlmd.py b/tests/test_pwmat_mlmd.py index f3b89c0e4..f35ef42b2 100644 --- a/tests/test_pwmat_mlmd.py +++ b/tests/test_pwmat_mlmd.py @@ -5,51 +5,58 @@ class TestSingleStep(unittest.TestCase): - def setUp(self): - self.LabeledSystem1 = dpdata.LabeledSystem(os.path.join('pwmat', 'OUT.MLMD'),\ - fmt='movement' ) + self.LabeledSystem1 = dpdata.LabeledSystem( + os.path.join("pwmat", "OUT.MLMD"), fmt="movement" + ) - def test_mlmd(self) : + def test_mlmd(self): - self.assertEqual(self.LabeledSystem1['energies'], -0.2197270691E+03) + self.assertEqual(self.LabeledSystem1["energies"], -0.2197270691e03) self.assertEqual(self.LabeledSystem1.get_nframes(), 1) self.assertEqual(self.LabeledSystem1.get_natoms(), 5) - self.assertEqual(self.LabeledSystem1.data['atom_names'], ['H', 'C']) - self.assertEqual(self.LabeledSystem1.data['atom_numbs'], [4, 1]) - def test_cell(self) : - fp = open('pwmat/mlmd_cell') + self.assertEqual(self.LabeledSystem1.data["atom_names"], ["H", "C"]) + self.assertEqual(self.LabeledSystem1.data["atom_numbs"], [4, 1]) + + def test_cell(self): + fp = open("pwmat/mlmd_cell") cell = [] - for ii in fp : + for ii in fp: cell.append([float(jj) for jj in ii.split()]) cell = np.array(cell) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertEqual(self.LabeledSystem1.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertEqual( + self.LabeledSystem1.data["cells"][0][ii][jj], cell[ii][jj] + ) fp.close() - - def test_coord(self) : - fp = open('pwmat/mlmd_coord') + + def test_coord(self): + fp = open("pwmat/mlmd_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertEqual(self.LabeledSystem1.data['coords'][0][ii][jj], coord[ii][jj]*10.0) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertEqual( + self.LabeledSystem1.data["coords"][0][ii][jj], coord[ii][jj] * 10.0 + ) fp.close() - def test_force(self) : - fp = open('pwmat/mlmd_force') + + def test_force(self): + fp = open("pwmat/mlmd_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertEqual(self.LabeledSystem1.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertEqual( + self.LabeledSystem1.data["forces"][0][ii][jj], force[ii][jj] + ) fp.close() - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_pwmat_movement.py b/tests/test_pwmat_movement.py index 6cf2163cc..e188c7462 100644 --- a/tests/test_pwmat_movement.py +++ b/tests/test_pwmat_movement.py @@ -3,61 +3,64 @@ import unittest import dpdata + class TestpwmatSinglePointEnergy: - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['H','C']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [4,1]) - def test_atom_types(self) : - ref_type = [0,0,0,0,1] - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system.data['atom_types'][ii], ref_type[ii]) - def test_cell(self) : - fp = open('pwmat/ref_cell') + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["H", "C"]) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [4, 1]) + + def test_atom_types(self): + ref_type = [0, 0, 0, 0, 1] + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system.data["atom_types"][ii], ref_type[ii]) + + def test_cell(self): + fp = open("pwmat/ref_cell") cell = [] - for ii in fp : + for ii in fp: cell.append([float(jj) for jj in ii.split()]) cell = np.array(cell) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertEqual(self.system.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertEqual(self.system.data["cells"][0][ii][jj], cell[ii][jj]) fp.close() - - def test_coord(self) : - fp = open('pwmat/ref_coord') + def test_coord(self): + fp = open("pwmat/ref_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertEqual(self.system.data['coords'][0][ii][jj], coord[ii][jj]*10.0) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertEqual( + self.system.data["coords"][0][ii][jj], coord[ii][jj] * 10.0 + ) fp.close() - def test_force(self) : - fp = open('pwmat/ref_force') + def test_force(self): + fp = open("pwmat/ref_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertEqual(self.system.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertEqual(self.system.data["forces"][0][ii][jj], force[ii][jj]) fp.close() - def test_energy(self) : - ref_energy = -0.2196929065E+03 - self.assertEqual(self.system.data['energies'][0], ref_energy) - + def test_energy(self): + ref_energy = -0.2196929065e03 + self.assertEqual(self.system.data["energies"][0], ref_energy) class TestpwmatLabeledOutput(unittest.TestCase, TestpwmatSinglePointEnergy): - def setUp(self): - self.system = dpdata.LabeledSystem('pwmat/MOVEMENT', fmt = 'pwmat/MOVEMENT') + self.system = dpdata.LabeledSystem("pwmat/MOVEMENT", fmt="pwmat/MOVEMENT") -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_pymatgen_molecule.py b/tests/test_pymatgen_molecule.py index d80acc922..8c3e72b66 100644 --- a/tests/test_pymatgen_molecule.py +++ b/tests/test_pymatgen_molecule.py @@ -2,37 +2,37 @@ import numpy as np import unittest from context import dpdata + try: import pymatgen except ModuleNotFoundError: - skip_pymatgen=True + skip_pymatgen = True else: - skip_pymatgen=False + skip_pymatgen = False + -@unittest.skipIf(skip_pymatgen,"skip pymatgen related test. install pymatgen to fix") +@unittest.skipIf(skip_pymatgen, "skip pymatgen related test. install pymatgen to fix") class TestPOSCARCart(unittest.TestCase): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_pymatgen_molecule(os.path.join('pymatgen_data', 'FA-001.xyz')) + self.system.from_pymatgen_molecule(os.path.join("pymatgen_data", "FA-001.xyz")) self.assertEqual(list(self.system["atom_types"]), [0, 1, 2, 1, 1, 2, 1, 1]) def test_poscar_to_molecule(self): tmp_system = dpdata.System() - tmp_system.from_vasp_poscar(os.path.join('pymatgen_data', 'mol2.vasp')) - natoms = len(tmp_system['coords'][0]) - tmpcoord = tmp_system['coords'][0] - cog = np.average(tmpcoord, axis = 0) + tmp_system.from_vasp_poscar(os.path.join("pymatgen_data", "mol2.vasp")) + natoms = len(tmp_system["coords"][0]) + tmpcoord = tmp_system["coords"][0] + cog = np.average(tmpcoord, axis=0) dist = tmpcoord - np.tile(cog, [natoms, 1]) - max_dist_0 = np.max(np.linalg.norm(dist, axis = 1)) + max_dist_0 = np.max(np.linalg.norm(dist, axis=1)) mols = tmp_system.to("pymatgen/molecule") - cog = np.average(mols[-1].cart_coords, axis = 0) + cog = np.average(mols[-1].cart_coords, axis=0) dist = mols[-1].cart_coords - np.tile(cog, [natoms, 1]) - max_dist_1 = np.max(np.linalg.norm(dist, axis = 1)) + max_dist_1 = np.max(np.linalg.norm(dist, axis=1)) self.assertAlmostEqual(max_dist_0, max_dist_1) - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_qe_cp_traj.py b/tests/test_qe_cp_traj.py index cad93fa6c..2cb982cee 100644 --- a/tests/test_qe_cp_traj.py +++ b/tests/test_qe_cp_traj.py @@ -5,58 +5,59 @@ bohr2ang = dpdata.unit.LengthConversion("bohr", "angstrom").value() -class TestCPTRAJProps : - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['O','H']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [64,127]) +class TestCPTRAJProps: + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["O", "H"]) - def test_atom_types(self) : - for ii in range(0,64) : - self.assertEqual(self.system.data['atom_types'][ii], 0) - for ii in range(64,191) : - self.assertEqual(self.system.data['atom_types'][ii], 1) + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [64, 127]) - def test_cell(self) : + def test_atom_types(self): + for ii in range(0, 64): + self.assertEqual(self.system.data["atom_types"][ii], 0) + for ii in range(64, 191): + self.assertEqual(self.system.data["atom_types"][ii], 1) + + def test_cell(self): ref = bohr2ang * 23.5170 * np.eye(3) self.assertEqual(self.system.get_nframes(), 2) - for ff in range(self.system.get_nframes()) : - for ii in range(3) : - for jj in range(3) : - self.assertEqual(self.system['cells'][ff][ii][jj], ref[ii][jj]) - - def test_coord(self) : - with open('qe.traj/oh-md.pos') as fp : - lines = fp.read().rstrip('\n').split('\n') + for ff in range(self.system.get_nframes()): + for ii in range(3): + for jj in range(3): + self.assertEqual(self.system["cells"][ff][ii][jj], ref[ii][jj]) + + def test_coord(self): + with open("qe.traj/oh-md.pos") as fp: + lines = fp.read().rstrip("\n").split("\n") lines = lines[-191:] coords = [] - for ii in lines : + for ii in lines: coords.append([float(jj) for jj in ii.split()]) coords = bohr2ang * np.array(coords) - celll = bohr2ang * 23.5170 - for ii in range(coords.shape[0]) : - for jj in range(coords[ii].size) : - if coords[ii][jj] < 0 : + celll = bohr2ang * 23.5170 + for ii in range(coords.shape[0]): + for jj in range(coords[ii].size): + if coords[ii][jj] < 0: coords[ii][jj] += celll - elif coords[ii][jj] >= celll : + elif coords[ii][jj] >= celll: coords[ii][jj] -= celll - self.assertAlmostEqual(self.system['coords'][-1][ii][jj], coords[ii][jj]) - - -class TestCPTRAJTraj(unittest.TestCase, TestCPTRAJProps): + self.assertAlmostEqual( + self.system["coords"][-1][ii][jj], coords[ii][jj] + ) - def setUp(self): - self.system = dpdata.System('qe.traj/oh-md', fmt = 'qe/cp/traj') +class TestCPTRAJTraj(unittest.TestCase, TestCPTRAJProps): + def setUp(self): + self.system = dpdata.System("qe.traj/oh-md", fmt="qe/cp/traj") -class TestCPTRAJLabeledTraj(unittest.TestCase, TestCPTRAJProps): - def setUp(self): - self.system = dpdata.LabeledSystem('qe.traj/oh-md', fmt = 'qe/cp/traj') +class TestCPTRAJLabeledTraj(unittest.TestCase, TestCPTRAJProps): + def setUp(self): + self.system = dpdata.LabeledSystem("qe.traj/oh-md", fmt="qe/cp/traj") -class TestConverCellDim(unittest.TestCase): +class TestConverCellDim(unittest.TestCase): def test_case_null(self): cell = dpdata.qe.traj.convert_celldm(8, [1, 1, 1]) ref = np.eye(3) @@ -65,6 +66,5 @@ def test_case_null(self): self.assertAlmostEqual(cell[ii][jj], ref[ii][jj]) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_qe_cp_traj_skipload.py b/tests/test_qe_cp_traj_skipload.py index b0d92e9d0..e8cc84bf1 100644 --- a/tests/test_qe_cp_traj_skipload.py +++ b/tests/test_qe_cp_traj_skipload.py @@ -8,53 +8,69 @@ bohr2ang = dpdata.unit.LengthConversion("bohr", "angstrom").value() + class TestPWSCFTrajSkip(unittest.TestCase, CompSys, IsPBC): - def setUp(self): - self.system_1 = dpdata.System(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 1, - step = 2) - self.system_2 = dpdata.System(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 0, - step = 1) \ - .sub_system(np.arange(1,6,2)) + def setUp(self): + self.system_1 = dpdata.System( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=1, step=2 + ) + self.system_2 = dpdata.System( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=0, step=1 + ).sub_system(np.arange(1, 6, 2)) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestPWSCFLabeledTrajSkip(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp(self): - self.system_1 = dpdata.LabeledSystem(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 1, - step = 2) - self.system_2 = dpdata.LabeledSystem(os.path.join('qe.traj', 'traj6'), - fmt = 'qe/cp/traj', - begin = 0, - step = 1) \ - .sub_system(np.arange(1,6,2)) + def setUp(self): + self.system_1 = dpdata.LabeledSystem( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=1, step=2 + ) + self.system_2 = dpdata.LabeledSystem( + os.path.join("qe.traj", "traj6"), fmt="qe/cp/traj", begin=0, step=1 + ).sub_system(np.arange(1, 6, 2)) self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 def test_cell(self): - ref_cell = [5.359985500701728967e+00, 0, 0, - 3.585941820098031974e-01, 5.317218997480877896e+00, 0, - 7.606780476053129902e-01, 7.811107228901693622e-01, 5.715864930517207121e+00 ] - ref_cell = bohr2ang * np.array(ref_cell).reshape(3,3) - - for ii in range(3) : - for jj in range(3) : - self.assertEqual(self.system_1.data['cells'][0][ii][jj], ref_cell[ii][jj]) - - ref_cell = [5.308510801020571712e+00, 0, 0, - 3.076052782312116429e-01, 5.279388982187173340e+00, 0, - 4.321921336152507731e-01, 8.121110815096156399e-01, 5.301664983741235737e+00] - ref_cell = bohr2ang * np.array(ref_cell).reshape(3,3) - - for ii in range(3) : - for jj in range(3) : - self.assertEqual(self.system_1.data['cells'][-1][ii][jj], ref_cell[ii][jj]) + ref_cell = [ + 5.359985500701728967e00, + 0, + 0, + 3.585941820098031974e-01, + 5.317218997480877896e00, + 0, + 7.606780476053129902e-01, + 7.811107228901693622e-01, + 5.715864930517207121e00, + ] + ref_cell = bohr2ang * np.array(ref_cell).reshape(3, 3) + + for ii in range(3): + for jj in range(3): + self.assertEqual( + self.system_1.data["cells"][0][ii][jj], ref_cell[ii][jj] + ) + + ref_cell = [ + 5.308510801020571712e00, + 0, + 0, + 3.076052782312116429e-01, + 5.279388982187173340e00, + 0, + 4.321921336152507731e-01, + 8.121110815096156399e-01, + 5.301664983741235737e00, + ] + ref_cell = bohr2ang * np.array(ref_cell).reshape(3, 3) + + for ii in range(3): + for jj in range(3): + self.assertEqual( + self.system_1.data["cells"][-1][ii][jj], ref_cell[ii][jj] + ) diff --git a/tests/test_qe_pw_scf.py b/tests/test_qe_pw_scf.py index 391e763eb..4cb78cbdb 100644 --- a/tests/test_qe_pw_scf.py +++ b/tests/test_qe_pw_scf.py @@ -3,128 +3,144 @@ import unittest from context import dpdata -class TestPWSCFSinglePointEnergy: - - def test_atom_names(self) : - self.assertEqual(self.system_ch4.data['atom_names'], ['H','C']) - self.assertEqual(self.system_h2o.data['atom_names'], ['O','H']) - - def test_atom_numbs(self) : - self.assertEqual(self.system_ch4.data['atom_numbs'], [4,1]) - self.assertEqual(self.system_h2o.data['atom_numbs'], [64,128]) - def test_atom_types(self) : - ref_type = [0,0,0,0,1] - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_ch4.data['atom_types'][ii], ref_type[ii]) - - ref_type = [0]*64 + [1]*128 - ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : - self.assertEqual(self.system_h2o.data['atom_types'][ii], ref_type[ii]) - - def test_cell(self) : +class TestPWSCFSinglePointEnergy: + def test_atom_names(self): + self.assertEqual(self.system_ch4.data["atom_names"], ["H", "C"]) + self.assertEqual(self.system_h2o.data["atom_names"], ["O", "H"]) + + def test_atom_numbs(self): + self.assertEqual(self.system_ch4.data["atom_numbs"], [4, 1]) + self.assertEqual(self.system_h2o.data["atom_numbs"], [64, 128]) + + def test_atom_types(self): + ref_type = [0, 0, 0, 0, 1] + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_ch4.data["atom_types"][ii], ref_type[ii]) + + ref_type = [0] * 64 + [1] * 128 + ref_type = np.array(ref_type) + for ii in range(ref_type.shape[0]): + self.assertEqual(self.system_h2o.data["atom_types"][ii], ref_type[ii]) + + def test_cell(self): cell = 10 * np.eye(3) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["cells"][0][ii][jj], cell[ii][jj] + ) - fp = open('qe.scf/h2o_cell') + fp = open("qe.scf/h2o_cell") cell = [] - for ii in fp : + for ii in fp: cell.append([float(jj) for jj in ii.split()]) cell = np.array(cell) - for ii in range(cell.shape[0]) : - for jj in range(cell.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['cells'][0][ii][jj], cell[ii][jj]) + for ii in range(cell.shape[0]): + for jj in range(cell.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["cells"][0][ii][jj], cell[ii][jj] + ) fp.close() - - def test_coord(self) : - fp = open('qe.scf/ch4_coord') + def test_coord(self): + fp = open("qe.scf/ch4_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['coords'][0][ii][jj], coord[ii][jj]) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["coords"][0][ii][jj], coord[ii][jj] + ) fp.close() - fp = open('qe.scf/h2o_coord') + fp = open("qe.scf/h2o_coord") coord = [] - for ii in fp : + for ii in fp: coord.append([float(jj) for jj in ii.split()]) coord = np.array(coord) - for ii in range(coord.shape[0]) : - for jj in range(coord.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['coords'][0][ii][jj], coord[ii][jj]) + for ii in range(coord.shape[0]): + for jj in range(coord.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["coords"][0][ii][jj], coord[ii][jj] + ) fp.close() - def test_force(self) : - fp = open('qe.scf/ch4_force') + def test_force(self): + fp = open("qe.scf/ch4_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["forces"][0][ii][jj], force[ii][jj] + ) fp.close() - fp = open('qe.scf/h2o_force') + fp = open("qe.scf/h2o_force") force = [] - for ii in fp : + for ii in fp: force.append([float(jj) for jj in ii.split()]) force = np.array(force) - for ii in range(force.shape[0]) : - for jj in range(force.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['forces'][0][ii][jj], force[ii][jj]) + for ii in range(force.shape[0]): + for jj in range(force.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["forces"][0][ii][jj], force[ii][jj] + ) fp.close() - def test_virial(self) : - fp = open('qe.scf/ch4_virial') + def test_virial(self): + fp = open("qe.scf/ch4_virial") virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) - for ii in range(virial.shape[0]) : - for jj in range(virial.shape[1]) : - self.assertAlmostEqual(self.system_ch4.data['virials'][0][ii][jj], virial[ii][jj], places = 3) + for ii in range(virial.shape[0]): + for jj in range(virial.shape[1]): + self.assertAlmostEqual( + self.system_ch4.data["virials"][0][ii][jj], virial[ii][jj], places=3 + ) fp.close() - fp = open('qe.scf/h2o_virial') + fp = open("qe.scf/h2o_virial") virial = [] - for ii in fp : + for ii in fp: virial.append([float(jj) for jj in ii.split()]) virial = np.array(virial) - for ii in range(virial.shape[0]) : - for jj in range(virial.shape[1]) : - self.assertAlmostEqual(self.system_h2o.data['virials'][0][ii][jj], virial[ii][jj], places = 2) + for ii in range(virial.shape[0]): + for jj in range(virial.shape[1]): + self.assertAlmostEqual( + self.system_h2o.data["virials"][0][ii][jj], virial[ii][jj], places=2 + ) fp.close() - def test_energy(self) : + def test_energy(self): ref_energy = -219.74425946528794 - self.assertAlmostEqual(self.system_ch4.data['energies'][0], ref_energy) + self.assertAlmostEqual(self.system_ch4.data["energies"][0], ref_energy) ref_energy = -30007.651851226798 - self.assertAlmostEqual(self.system_h2o.data['energies'][0], ref_energy) - + self.assertAlmostEqual(self.system_h2o.data["energies"][0], ref_energy) class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFSinglePointEnergy): - def setUp(self): - self.system_ch4 = dpdata.LabeledSystem('qe.scf/01.out',fmt='qe/pw/scf') - self.system_h2o = dpdata.LabeledSystem('qe.scf/02.out',fmt='qe/pw/scf') + self.system_ch4 = dpdata.LabeledSystem("qe.scf/01.out", fmt="qe/pw/scf") + self.system_h2o = dpdata.LabeledSystem("qe.scf/02.out", fmt="qe/pw/scf") -class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): +class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): def setUp(self): - self.system_ch4 = dpdata.LabeledSystem(['qe.scf/01.in', 'qe.scf/01.out'], fmt='qe/pw/scf') - self.system_h2o = dpdata.LabeledSystem(['qe.scf/02.in', 'qe.scf/02.out'], fmt='qe/pw/scf') + self.system_ch4 = dpdata.LabeledSystem( + ["qe.scf/01.in", "qe.scf/01.out"], fmt="qe/pw/scf" + ) + self.system_h2o = dpdata.LabeledSystem( + ["qe.scf/02.in", "qe.scf/02.out"], fmt="qe/pw/scf" + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_qe_pw_scf_crystal_atomic_positions.py b/tests/test_qe_pw_scf_crystal_atomic_positions.py index 22addec48..19dc7b59c 100644 --- a/tests/test_qe_pw_scf_crystal_atomic_positions.py +++ b/tests/test_qe_pw_scf_crystal_atomic_positions.py @@ -3,24 +3,32 @@ import unittest from context import dpdata + class TestPWSCFCrystalAtomicPosition: + def test_coord(self): + ref_coord = np.array( + [[0, 0, 0], [0, 2.02, 2.02], [2.02, 0, 2.02], [2.02, 2.02, 0]] + ) + for ii in range(ref_coord.shape[0]): + for jj in range(ref_coord.shape[1]): + self.assertAlmostEqual( + self.system_al.data["coords"][0][ii][jj], ref_coord[ii][jj] + ) - def test_coord(self) : - ref_coord = np.array([[0,0,0], [0, 2.02, 2.02], [2.02, 0, 2.02], [2.02, 2.02, 0]]) - for ii in range(ref_coord.shape[0]) : - for jj in range(ref_coord.shape[1]) : - self.assertAlmostEqual(self.system_al.data['coords'][0][ii][jj], ref_coord[ii][jj]) class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFCrystalAtomicPosition): - def setUp(self): - self.system_al = dpdata.LabeledSystem('qe.scf/Al.out',fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem("qe.scf/Al.out", fmt="qe/pw/scf") -class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFCrystalAtomicPosition): +class TestPWSCFLabeledOutputListInput( + unittest.TestCase, TestPWSCFCrystalAtomicPosition +): def setUp(self): - self.system_al = dpdata.LabeledSystem(['qe.scf/Al.in', 'qe.scf/Al.out'], fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem( + ["qe.scf/Al.in", "qe.scf/Al.out"], fmt="qe/pw/scf" + ) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_qe_pw_scf_energy_bug.py b/tests/test_qe_pw_scf_energy_bug.py index 975aca487..85d793552 100644 --- a/tests/test_qe_pw_scf_energy_bug.py +++ b/tests/test_qe_pw_scf_energy_bug.py @@ -3,22 +3,24 @@ import unittest from context import dpdata -class TestPWSCFSinglePointEnergy: - def test_energy(self) : +class TestPWSCFSinglePointEnergy: + def test_energy(self): ref_energy = -296.08379065679094669 - self.assertAlmostEqual(self.system_al.data['energies'][0], ref_energy) + self.assertAlmostEqual(self.system_al.data["energies"][0], ref_energy) -class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFSinglePointEnergy): +class TestPWSCFLabeledOutput(unittest.TestCase, TestPWSCFSinglePointEnergy): def setUp(self): - self.system_al = dpdata.LabeledSystem('qe.scf/Al.out',fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem("qe.scf/Al.out", fmt="qe/pw/scf") -class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): +class TestPWSCFLabeledOutputListInput(unittest.TestCase, TestPWSCFSinglePointEnergy): def setUp(self): - self.system_al = dpdata.LabeledSystem(['qe.scf/Al.in', 'qe.scf/Al.out'], fmt='qe/pw/scf') + self.system_al = dpdata.LabeledSystem( + ["qe.scf/Al.in", "qe.scf/Al.out"], fmt="qe/pw/scf" + ) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_quip_gap_xyz.py b/tests/test_quip_gap_xyz.py index 8a023bc4d..272856067 100644 --- a/tests/test_quip_gap_xyz.py +++ b/tests/test_quip_gap_xyz.py @@ -4,88 +4,116 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestQuipGapxyz1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems.systems['B1C9'] - self.system_2 = dpdata.LabeledSystem('xyz/B1C9', fmt='deepmd') + def setUp(self): + self.multi_systems = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems.systems["B1C9"] + self.system_2 = dpdata.LabeledSystem("xyz/B1C9", fmt="deepmd") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyz2(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_temp0 = dpdata.MultiSystems.from_file(file_name='xyz/xyz_unittest.xyz', fmt='quip/gap/xyz') - self.system_1 = self.system_temp0.systems['B5C7'] # .sort_atom_types() - self.system_temp1 = dpdata.LabeledSystem('xyz/B1C9', fmt='deepmd') - self.system_temp2 = dpdata.LabeledSystem('xyz/B5C7', fmt='deepmd') + def setUp(self): + self.system_temp0 = dpdata.MultiSystems.from_file( + file_name="xyz/xyz_unittest.xyz", fmt="quip/gap/xyz" + ) + self.system_1 = self.system_temp0.systems["B5C7"] # .sort_atom_types() + self.system_temp1 = dpdata.LabeledSystem("xyz/B1C9", fmt="deepmd") + self.system_temp2 = dpdata.LabeledSystem("xyz/B5C7", fmt="deepmd") self.system_temp3 = dpdata.MultiSystems(self.system_temp2, self.system_temp1) - self.system_2 = self.system_temp3.systems['B5C7'] + self.system_2 = self.system_temp3.systems["B5C7"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzsort1(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.sort.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B5C7'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.sort.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B5C7"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B5C7'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B5C7"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzsort2(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.sort.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B1C9'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.sort.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B1C9"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B1C9'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B1C9"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzfield(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.field.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B1C9'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.field.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B1C9"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B1C9'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B1C9"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzfield2(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.field.xyz','quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B5C7'] + def setUp(self): + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.field.xyz", "quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B5C7"] self.system_1.sort_atom_types() - self.multi_systems_2 = dpdata.MultiSystems.from_file('xyz/xyz_unittest.xyz','quip/gap/xyz') - self.system_2 = self.multi_systems_2.systems['B5C7'] + self.multi_systems_2 = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_2 = self.multi_systems_2.systems["B5C7"] self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestQuipGapxyzNoVirials(unittest.TestCase, CompLabeledSys, IsPBC): def setUp(self): - self.multi_systems_1 = dpdata.MultiSystems.from_file('xyz/xyz_B5C7_novirials.xyz', fmt='quip/gap/xyz') - self.system_1 = self.multi_systems_1.systems['B5C7'] + self.multi_systems_1 = dpdata.MultiSystems.from_file( + "xyz/xyz_B5C7_novirials.xyz", fmt="quip/gap/xyz" + ) + self.system_1 = self.multi_systems_1.systems["B5C7"] self.system_1.sort_atom_types() - self.system_2 = dpdata.LabeledSystem('xyz/B5C7_novirials', fmt='deepmd/raw') + self.system_2 = dpdata.LabeledSystem("xyz/B5C7_novirials", fmt="deepmd/raw") self.places = 6 self.e_places = 6 self.f_places = 6 - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_remove_atom_names.py b/tests/test_remove_atom_names.py index 043511fbd..4d4e23ec8 100644 --- a/tests/test_remove_atom_names.py +++ b/tests/test_remove_atom_names.py @@ -3,14 +3,20 @@ from comp_sys import CompLabeledSys from comp_sys import IsNoPBC + class TestRemove(unittest.TestCase, CompLabeledSys, IsNoPBC): def setUp(self): - self.system_1 = dpdata.LabeledSystem("amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy").remove_atom_names('EP') - self.system_2 = dpdata.LabeledSystem("amber/corr/dataset/C6H11HW192O6OW96P1", fmt="deepmd/npy") + self.system_1 = dpdata.LabeledSystem( + "amber/corr/dp_amber_mask/C6EP0H11HW192O6OW96P1", fmt="deepmd/npy" + ).remove_atom_names("EP") + self.system_2 = dpdata.LabeledSystem( + "amber/corr/dataset/C6H11HW192O6OW96P1", fmt="deepmd/npy" + ) self.places = 5 self.e_places = 4 self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_remove_pbc.py b/tests/test_remove_pbc.py index ea2fc2202..558587b06 100644 --- a/tests/test_remove_pbc.py +++ b/tests/test_remove_pbc.py @@ -3,23 +3,29 @@ import unittest from context import dpdata -class TestRemovePBC(unittest.TestCase): +class TestRemovePBC(unittest.TestCase): def test_remove(self): - coords = np.array([[[-1, -1, 2], [-1,-1,-3], [-1,-1, 7]], - [[ 3, -1, 3], [-1,-1, 3], [ 7,-1, 3]]], dtype = float) - cogs = np.average(coords, axis = 1) - data = {'atom_names' : ['A', 'B'], - 'atom_numbs' : [1, 2], - 'atom_types' : np.array([1, 0, 1], dtype = int), - 'orig': np.array([0, 0, 0]), - 'coords': coords, - 'cells': np.random.random([2, 3, 3]), + coords = np.array( + [ + [[-1, -1, 2], [-1, -1, -3], [-1, -1, 7]], + [[3, -1, 3], [-1, -1, 3], [7, -1, 3]], + ], + dtype=float, + ) + cogs = np.average(coords, axis=1) + data = { + "atom_names": ["A", "B"], + "atom_numbs": [1, 2], + "atom_types": np.array([1, 0, 1], dtype=int), + "orig": np.array([0, 0, 0]), + "coords": coords, + "cells": np.random.random([2, 3, 3]), } - sys = dpdata.System(data = data) + sys = dpdata.System(data=data) proct = 9.0 - - mol_size = np.array([5, 4], dtype = float) + + mol_size = np.array([5, 4], dtype=float) cell_size = (mol_size + proct) * 2.0 sys.remove_pbc(proct) @@ -28,10 +34,16 @@ def test_remove(self): ref = cell_size[ff] * np.eye(3) for ii in range(3): for jj in range(3): - self.assertAlmostEqual(sys['cells'][ff][ii][jj], ref[ii][jj], msg = '%d %d %d' %(ff, ii, jj)) + self.assertAlmostEqual( + sys["cells"][ff][ii][jj], + ref[ii][jj], + msg="%d %d %d" % (ff, ii, jj), + ) dists = [] for ii in range(sys.get_natoms()): for jj in range(3): - dists.append(np.abs(sys['coords'][ff][ii][jj])) - dists.append(np.abs(sys['cells'][ff][jj][jj] - sys['coords'][ff][ii][jj])) + dists.append(np.abs(sys["coords"][ff][ii][jj])) + dists.append( + np.abs(sys["cells"][ff][jj][jj] - sys["coords"][ff][ii][jj]) + ) self.assertAlmostEqual(np.min(dists), proct) diff --git a/tests/test_replace.py b/tests/test_replace.py index a8dd917b5..f0f4430f5 100644 --- a/tests/test_replace.py +++ b/tests/test_replace.py @@ -5,30 +5,35 @@ from comp_sys import CompSys, IsPBC from unittest.mock import Mock -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock class ConstGenerator(object): def __init__(self): self.choice_generator = self.get_choice_generator() + def choice(self, a, size=None, replace=True, p=None): return next(self.choice_generator) @staticmethod def get_choice_generator(): - yield np.asarray([20, 6, 7, 22, 29, 2, 23, 10]) + yield np.asarray([20, 6, 7, 22, 29, 2, 23, 10]) + class TestReplace(unittest.TestCase, CompSys, IsPBC): - @patch('numpy.random') + @patch("numpy.random") def setUp(self, random_mock): random_mock.choice = ConstGenerator().choice - self.system_1 = dpdata.System('poscars/POSCAR.P42nmc',fmt='vasp/poscar') - self.system_1.replace('Hf', 'Zr', 8) + self.system_1 = dpdata.System("poscars/POSCAR.P42nmc", fmt="vasp/poscar") + self.system_1.replace("Hf", "Zr", 8) # print(self.system_1.data) - self.system_2 = dpdata.System('poscars/POSCAR.P42nmc.replace',fmt='vasp/poscar') + self.system_2 = dpdata.System( + "poscars/POSCAR.P42nmc.replace", fmt="vasp/poscar" + ) # print(self.system_2.data) self.places = 6 + # class TestReplicate123_not_change_origin(unittest.TestCase, CompSys, IsPBC): # def setUp (self) : # self.system_1 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') @@ -36,5 +41,5 @@ def setUp(self, random_mock): # self.system_2 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') # self.places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_replicate.py b/tests/test_replicate.py index 16ef8636b..1a7590b6b 100644 --- a/tests/test_replicate.py +++ b/tests/test_replicate.py @@ -4,19 +4,36 @@ from context import dpdata from comp_sys import CompSys, IsPBC + class TestReplicate123(unittest.TestCase, CompSys, IsPBC): - def setUp (self) : - system_1_origin = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1 = system_1_origin.replicate((1,2,3,)) - self.system_2 = dpdata.System('poscars/POSCAR.SiC.replicate123',fmt='vasp/poscar') + def setUp(self): + system_1_origin = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1 = system_1_origin.replicate( + ( + 1, + 2, + 3, + ) + ) + self.system_2 = dpdata.System( + "poscars/POSCAR.SiC.replicate123", fmt="vasp/poscar" + ) self.places = 6 + class TestReplicate123_not_change_origin(unittest.TestCase, CompSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') - self.system_1.replicate((1,2,3,)) - self.system_2 = dpdata.System('poscars/POSCAR.SiC',fmt='vasp/poscar') + def setUp(self): + self.system_1 = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") + self.system_1.replicate( + ( + 1, + 2, + 3, + ) + ) + self.system_2 = dpdata.System("poscars/POSCAR.SiC", fmt="vasp/poscar") self.places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_shuffle.py b/tests/test_shuffle.py index aa2d7a7e3..8acbb42ae 100644 --- a/tests/test_shuffle.py +++ b/tests/test_shuffle.py @@ -2,10 +2,12 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestDeepmdLoadRaw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - original_system = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') + def setUp(self): + original_system = dpdata.LabeledSystem( + "poscars/OUTCAR.h2o.md", fmt="vasp/outcar" + ) original_system += original_system original_system += original_system original_system += original_system diff --git a/tests/test_siesta_aiMD_output.py b/tests/test_siesta_aiMD_output.py index d66b19624..8ea4be359 100644 --- a/tests/test_siesta_aiMD_output.py +++ b/tests/test_siesta_aiMD_output.py @@ -6,90 +6,155 @@ class TestSIESTASinglePointEnergy: def test_atom_names(self): - self.assertEqual(self.system.data['atom_names'], ['Si']) + self.assertEqual(self.system.data["atom_names"], ["Si"]) def test_atom_numbs(self): - self.assertEqual(self.system.data['atom_numbs'], [64]) + self.assertEqual(self.system.data["atom_numbs"], [64]) def test_atom_types(self): - ref_type = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ref_type = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] ref_type = np.array(ref_type) for ii in range(ref_type.shape[0]): # print(self.system.data['atom_types'][0][ii]) - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) def test_cell(self): - fp = open('siesta/aimd/cell') + fp = open("siesta/aimd/cell") ref_cell = [] for ii in fp: for jj in ii.split(): ref_cell.append(float(jj)) fp.close() - cells = self.system.data['cells'].flatten() + cells = self.system.data["cells"].flatten() idx = 0 for ii in range(len(cells)): self.assertAlmostEqual(cells[ii], float(ref_cell[ii])) def test_coord(self): - fp = open('siesta/aimd/coord') + fp = open("siesta/aimd/coord") ref_coord = [] for ii in fp: for jj in ii.split(): ref_coord.append(float(jj)) fp.close() - coords = self.system.data['coords'].flatten() + coords = self.system.data["coords"].flatten() for ii in range(len(coords)): self.assertAlmostEqual(coords[ii], float(ref_coord[ii])) def test_force(self): eV = 1 angstrom = 1 - fp = open('siesta/aimd/force') + fp = open("siesta/aimd/force") ref_force = [] for ii in fp: for jj in ii.split(): ref_force.append(float(jj)) fp.close() - forces = self.system.data['forces'].flatten() + forces = self.system.data["forces"].flatten() for ii in range(len(forces)): self.assertAlmostEqual(forces[ii], float(ref_force[ii])) def test_viriale(self): toViri = 1 vol = 1308.4268 - fp = open('siesta/aimd/virial') + fp = open("siesta/aimd/virial") ref_virial = [] for ii in fp: for jj in ii.split(): ref_virial.append(float(jj)) fp.close() - virials = self.system.data['virials'].flatten() + virials = self.system.data["virials"].flatten() for ii in range(len(virials)): self.assertAlmostEqual(virials[ii], float(ref_virial[ii]) * toViri * vol) def test_energy(self): eV = 1 - fp = open('siesta/aimd/energy') + fp = open("siesta/aimd/energy") ref_energy = [] for ii in fp: for jj in ii.split(): ref_energy.append(float(jj)) fp.close() - energy = self.system.data['energies'] + energy = self.system.data["energies"] for ii in range(len(energy)): self.assertAlmostEqual(energy[ii], ref_energy[ii]) class TestAimdSIESTALabeledOutput(unittest.TestCase, TestSIESTASinglePointEnergy): - def setUp(self): - self.system = dpdata.LabeledSystem('siesta/aimd/output', fmt='siesta/aiMD_output') + self.system = dpdata.LabeledSystem( + "siesta/aimd/output", fmt="siesta/aiMD_output" + ) # self.system.data = dpdata.siesta.output.obtain_frame('siesta/siesta_output') -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_siesta_output.py b/tests/test_siesta_output.py index 27e613513..2539173d6 100644 --- a/tests/test_siesta_output.py +++ b/tests/test_siesta_output.py @@ -3,67 +3,70 @@ import unittest from context import dpdata + class TestSIESTASinglePointEnergy: - def test_atom_names(self) : - self.assertEqual(self.system.data['atom_names'], ['H','C']) - def test_atom_numbs(self) : - self.assertEqual(self.system.data['atom_numbs'], [4, 1]) - def test_atom_types(self) : - ref_type = [0,0,0,0,1] + def test_atom_names(self): + self.assertEqual(self.system.data["atom_names"], ["H", "C"]) + + def test_atom_numbs(self): + self.assertEqual(self.system.data["atom_numbs"], [4, 1]) + + def test_atom_types(self): + ref_type = [0, 0, 0, 0, 1] ref_type = np.array(ref_type) - for ii in range(ref_type.shape[0]) : + for ii in range(ref_type.shape[0]): # print(self.system.data['atom_types'][0][ii]) - self.assertAlmostEqual(self.system.data['atom_types'][ii], ref_type[ii]) + self.assertAlmostEqual(self.system.data["atom_types"][ii], ref_type[ii]) - def test_cell(self) : - fp = open('siesta/scf/ref_cell') + def test_cell(self): + fp = open("siesta/scf/ref_cell") cell = [] - for ii in fp : + for ii in fp: for jj in ii.split(): cell.append(float(jj)) cell = np.array(cell) # print(cell) fp.close() - res = self.system.data['cells'][0].flatten() + res = self.system.data["cells"][0].flatten() for ii in range(len(cell)): self.assertAlmostEqual(res[ii], cell[ii]) - def test_coord(self) : - fp = open('siesta/scf/ref_coord') + def test_coord(self): + fp = open("siesta/scf/ref_coord") coord = [] for ii in fp: for jj in ii.split(): coord.append(float(jj)) coord = np.array(coord) fp.close() - res = self.system.data['coords'][0].flatten() - for ii in range(len(coord)) : + res = self.system.data["coords"][0].flatten() + for ii in range(len(coord)): self.assertAlmostEqual(res[ii], float(coord[ii])) - def test_force(self) : + def test_force(self): eV = 1 angstrom = 1 - fp = open('siesta/scf/ref_force') + fp = open("siesta/scf/ref_force") force = [] for ii in fp: for jj in ii.split(): force.append(float(jj)) force = np.array(force) fp.close() - res = self.system.data['forces'][0].flatten() + res = self.system.data["forces"][0].flatten() for ii in range(len(force)): self.assertAlmostEqual(res[ii], float(force[ii])) - def test_viriale(self) : + def test_viriale(self): toViri = 1 - fp = open('siesta/scf/ref_cell') + fp = open("siesta/scf/ref_cell") cell = [] for ii in fp: for jj in ii.split(): cell.append(float(jj)) cell = np.array(cell) - cells = cell.reshape(3,3) + cells = cell.reshape(3, 3) fp.close() toVol = [] @@ -71,28 +74,29 @@ def test_viriale(self) : ### calucate vol toVol.append(np.linalg.det(cells)) - fp = open('siesta/scf/ref_virial') + fp = open("siesta/scf/ref_virial") virial = [] for ii in fp: for jj in ii.split(): virial.append(float(jj) * toViri * toVol[0]) virial = np.array(virial) fp.close() - res = self.system.data['virials'][0].flatten() + res = self.system.data["virials"][0].flatten() for ii in range(len(virial)): self.assertAlmostEqual(res[ii], float(virial[ii])) - def test_energy(self) : + def test_energy(self): eV = 1 ref_energy = -219.1640 - self.assertAlmostEqual(self.system.data['energies'][0], ref_energy*eV) + self.assertAlmostEqual(self.system.data["energies"][0], ref_energy * eV) class TestSIESTALabeledOutput(unittest.TestCase, TestSIESTASinglePointEnergy): - def setUp(self): - self.system = dpdata.LabeledSystem('siesta/scf/siesta_output', fmt = 'siesta/output') + self.system = dpdata.LabeledSystem( + "siesta/scf/siesta_output", fmt="siesta/output" + ) -if __name__ == '__main__': - unittest.main() +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_sqm_driver.py b/tests/test_sqm_driver.py index 3755fac97..b06ab26ce 100644 --- a/tests/test_sqm_driver.py +++ b/tests/test_sqm_driver.py @@ -9,23 +9,26 @@ @unittest.skipIf(shutil.which("sqm") is None, "sqm is not installed") class TestSQMdriver(unittest.TestCase, CompSys, IsNoPBC): """Test sqm with a hydrogen ion.""" + @classmethod def setUpClass(cls): - cls.system_1 = dpdata.System(data={ - "atom_names": ["H"], - "atom_numbs": [1], - "atom_types": np.zeros((1,), dtype=int), - "coords": np.zeros((1, 1, 3), dtype=np.float32), - "cells": np.zeros((1, 3, 3), dtype=np.float32), - "orig": np.zeros(3, dtype=np.float32), - "nopbc": True, - }) + cls.system_1 = dpdata.System( + data={ + "atom_names": ["H"], + "atom_numbs": [1], + "atom_types": np.zeros((1,), dtype=int), + "coords": np.zeros((1, 1, 3), dtype=np.float32), + "cells": np.zeros((1, 3, 3), dtype=np.float32), + "orig": np.zeros(3, dtype=np.float32), + "nopbc": True, + } + ) cls.system_2 = cls.system_1.predict(theory="DFTB3", charge=1, driver="sqm") cls.places = 6 - + def test_energy(self): - self.assertAlmostEqual(self.system_2['energies'].ravel()[0], 6.549447) - + self.assertAlmostEqual(self.system_2["energies"].ravel()[0], 6.549447) + def test_forces(self): - forces = self.system_2['forces'] + forces = self.system_2["forces"] np.testing.assert_allclose(forces, np.zeros_like(forces)) diff --git a/tests/test_stat.py b/tests/test_stat.py index 1fcc5af9a..62d045f43 100644 --- a/tests/test_stat.py +++ b/tests/test_stat.py @@ -5,7 +5,9 @@ class TestStat(unittest.TestCase): def test_errors(self): - system1 = dpdata.LabeledSystem("gaussian/methane.gaussianlog", fmt="gaussian/log") + system1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) system2 = dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out") e = dpdata.stat.Errors(system1, system2) @@ -15,8 +17,12 @@ def test_errors(self): self.assertAlmostEqual(e.f_rmse, 0.005714011247538185, 6) def test_multi_errors(self): - system1 = dpdata.MultiSystems(dpdata.LabeledSystem("gaussian/methane.gaussianlog", fmt="gaussian/log")) - system2 = dpdata.MultiSystems(dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out")) + system1 = dpdata.MultiSystems( + dpdata.LabeledSystem("gaussian/methane.gaussianlog", fmt="gaussian/log") + ) + system2 = dpdata.MultiSystems( + dpdata.LabeledSystem("amber/sqm_opt.out", fmt="sqm/out") + ) e = dpdata.stat.MultiErrors(system1, system2) self.assertAlmostEqual(e.e_mae, 1014.7946598792427, 6) diff --git a/tests/test_system_append.py b/tests/test_system_append.py index a166398ba..698832476 100644 --- a/tests/test_system_append.py +++ b/tests/test_system_append.py @@ -9,15 +9,17 @@ class TestFailedAppend(unittest.TestCase): def test_failed_append(self): - sys1 = dpdata.System('poscars/POSCAR.h2o.md', fmt='vasp/poscar') - sys2 = dpdata.System('poscars/POSCAR.h4o3', fmt='vasp/poscar') + sys1 = dpdata.System("poscars/POSCAR.h2o.md", fmt="vasp/poscar") + sys2 = dpdata.System("poscars/POSCAR.h4o3", fmt="vasp/poscar") with self.assertRaises(Exception) as c: sys1.append(sys2) - self.assertTrue("systems with inconsistent formula could not be append" in str(c.exception)) + self.assertTrue( + "systems with inconsistent formula could not be append" in str(c.exception) + ) class TestVaspXmlAppend(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 6 # rotated vasp computation, subject to numerical error self.e_places = 6 @@ -26,26 +28,33 @@ def setUp (self) : begin = 2 end = 10 step = 3 - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml') - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml') + self.system_1 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.10.xml") + self.system_2 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.10.xml") self.system_1.append(self.system_2) - + self.system_1 = self.system_1.sub_system([0, 12, 4, 16, 8]) - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml').sub_system(np.arange(0,10,2)) + self.system_2 = dpdata.LabeledSystem( + "poscars/vasprun.h2o.md.10.xml" + ).sub_system(np.arange(0, 10, 2)) class TestDifferentOrderAppend(unittest.TestCase, CompLabeledSys, IsNoPBC): - def setUp (self) : + def setUp(self): self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 - self.system_1 = dpdata.LabeledSystem('gaussian/methane.gaussianlog', fmt='gaussian/log') - system_2 = dpdata.LabeledSystem('gaussian/methane_reordered.gaussianlog', fmt='gaussian/log') + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + system_2 = dpdata.LabeledSystem( + "gaussian/methane_reordered.gaussianlog", fmt="gaussian/log" + ) self.system_1.append(system_2) - + self.system_2 = self.system_1.sub_system([0, 0]) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_system_apply_pbc.py b/tests/test_system_apply_pbc.py index 123141951..96b06989f 100644 --- a/tests/test_system_apply_pbc.py +++ b/tests/test_system_apply_pbc.py @@ -3,28 +3,31 @@ import unittest from context import dpdata -class TestPBC(unittest.TestCase) : - def test_pbc(self) : + +class TestPBC(unittest.TestCase): + def test_pbc(self): nframes = 10 natoms = 20 data = {} - data['coords'] = np.random.random([nframes, natoms, 3]) + [5, 5, 5] - data['cells'] = np.tile(10 * np.eye(3), [nframes, 1, 1]) - data['cells'] += np.random.random([nframes, 3, 3]) + data["coords"] = np.random.random([nframes, natoms, 3]) + [5, 5, 5] + data["cells"] = np.tile(10 * np.eye(3), [nframes, 1, 1]) + data["cells"] += np.random.random([nframes, 3, 3]) shift = 20 * (np.random.random([nframes, natoms, 3]) - 0.5) shift = shift.astype(int) - bk_coord = np.copy(data['coords']) - data['coords'] += np.matmul(shift, data['cells']) + bk_coord = np.copy(data["coords"]) + data["coords"] += np.matmul(shift, data["cells"]) sys = dpdata.System() sys.data = data sys.apply_pbc() - for ii in range(nframes) : - for jj in range(natoms) : - for dd in range(3) : - self.assertAlmostEqual(sys['coords'][ii][jj][dd], - bk_coord[ii][jj][dd], - msg = 'coord[%d][%d][%d] failed' % (ii,jj,dd)) - -if __name__ == '__main__': - unittest.main() + for ii in range(nframes): + for jj in range(natoms): + for dd in range(3): + self.assertAlmostEqual( + sys["coords"][ii][jj][dd], + bk_coord[ii][jj][dd], + msg="coord[%d][%d][%d] failed" % (ii, jj, dd), + ) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_system_set_type.py b/tests/test_system_set_type.py index b8004a35c..27e0da97f 100644 --- a/tests/test_system_set_type.py +++ b/tests/test_system_set_type.py @@ -7,35 +7,31 @@ class TestSetAtomTypes(unittest.TestCase): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.10.xml") self.type_1 = self.system_1.get_atom_types() - self.system_types = np.array([0,0,1,1,1,1]) - self.type_2 = self.system_1.map_atom_types(["H","C","O"]) - self.type_3 = self.system_1.map_atom_types({"H":2,"C":1,"O":3}) + self.system_types = np.array([0, 0, 1, 1, 1, 1]) + self.type_2 = self.system_1.map_atom_types(["H", "C", "O"]) + self.type_3 = self.system_1.map_atom_types({"H": 2, "C": 1, "O": 3}) - def test_types_func_1(self): - atom_types=np.array([2,2,0,0,0,0]) - atom_types_system_2=self.type_2 - atom_types_system_1=self.type_1 - for d0 in range(3) : - self.assertEqual(atom_types[d0], - atom_types_system_2[d0]) - for d0 in range(3) : - self.assertEqual(self.system_types[d0], - atom_types_system_1[d0]) + atom_types = np.array([2, 2, 0, 0, 0, 0]) + atom_types_system_2 = self.type_2 + atom_types_system_1 = self.type_1 + for d0 in range(3): + self.assertEqual(atom_types[d0], atom_types_system_2[d0]) + for d0 in range(3): + self.assertEqual(self.system_types[d0], atom_types_system_1[d0]) def test_types_func_2(self): - atom_types=np.array([3,3,2,2,2,2]) - atom_types_system_3=self.type_3 - atom_types_system_1=self.type_1 - for d0 in range(3) : - self.assertEqual(atom_types[d0], - atom_types_system_3[d0]) - for d0 in range(3) : - self.assertEqual(self.system_types[d0], - atom_types_system_1[d0]) + atom_types = np.array([3, 3, 2, 2, 2, 2]) + atom_types_system_3 = self.type_3 + atom_types_system_1 = self.type_1 + for d0 in range(3): + self.assertEqual(atom_types[d0], atom_types_system_3[d0]) + for d0 in range(3): + self.assertEqual(self.system_types[d0], atom_types_system_1[d0]) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/test_to_ase.py b/tests/test_to_ase.py index d1c42b8b4..187d8907b 100644 --- a/tests/test_to_ase.py +++ b/tests/test_to_ase.py @@ -3,24 +3,26 @@ import unittest from context import dpdata from comp_sys import CompSys, IsPBC + try: from ase import Atoms from ase.io import write except ModuleNotFoundError: - exist_module=False + exist_module = False else: - exist_module=True + exist_module = True -@unittest.skipIf(not exist_module,"skip test_ase") +@unittest.skipIf(not exist_module, "skip test_ase") class TestASE(unittest.TestCase, CompSys, IsPBC): - - def setUp(self): + def setUp(self): system_1 = dpdata.System() - system_1.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - write('tmp.POSCAR',system_1.to_ase_structure()[0],vasp5=True) - self.system_1=system_1 - self.system_2=dpdata.System('tmp.POSCAR') + system_1.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + write("tmp.POSCAR", system_1.to_ase_structure()[0], vasp5=True) + self.system_1 = system_1 + self.system_2 = dpdata.System("tmp.POSCAR") self.places = 6 self.e_places = 6 self.f_places = 6 @@ -30,9 +32,12 @@ def setUp(self): @unittest.skipIf(not exist_module, "skip test_ase") class TestFromASE(unittest.TestCase, CompSys, IsPBC): """Test ASEStructureFormat.from_system""" - def setUp(self): + + def setUp(self): system_1 = dpdata.System() - system_1.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) + system_1.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) atoms = system_1.to_ase_structure()[0] self.system_1 = system_1 self.system_2 = dpdata.System(atoms, fmt="ase/structure") @@ -44,6 +49,5 @@ def setUp(self): self.v_places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_to_list.py b/tests/test_to_list.py index e4c83cff3..4aa90885d 100644 --- a/tests/test_to_list.py +++ b/tests/test_to_list.py @@ -4,10 +4,10 @@ from context import dpdata from comp_sys import CompLabeledSys, IsPBC + class TestToList(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - system = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md', - fmt = 'vasp/outcar') + def setUp(self): + system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") self.system_1 = system.sub_system([2]) self.system_2 = system.to_list()[2] self.places = 6 @@ -16,5 +16,5 @@ def setUp (self) : self.v_places = 4 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_to_pymatgen.py b/tests/test_to_pymatgen.py index 0077a0495..abe251502 100644 --- a/tests/test_to_pymatgen.py +++ b/tests/test_to_pymatgen.py @@ -3,26 +3,30 @@ import unittest from context import dpdata from comp_sys import CompSys, IsPBC + try: - from pymatgen import Structure - exist_module=True + from pymatgen import Structure + + exist_module = True except Exception: - exist_module=False + exist_module = False -@unittest.skipIf(not exist_module,"skip pymatgen") + +@unittest.skipIf(not exist_module, "skip pymatgen") class TestPymatgen(unittest.TestCase, CompSys, IsPBC): - - def setUp(self): + def setUp(self): system_1 = dpdata.System() - system_1.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - system_1.to_pymatgen_structure()[0].to('poscar','tmp.POSCAR') - self.system_1=system_1 - self.system_2=dpdata.System('tmp.POSCAR') + system_1.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + system_1.to_pymatgen_structure()[0].to("poscar", "tmp.POSCAR") + self.system_1 = system_1 + self.system_2 = dpdata.System("tmp.POSCAR") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 6 -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_to_pymatgen_entry.py b/tests/test_to_pymatgen_entry.py index e0a952be3..1cad9c174 100644 --- a/tests/test_to_pymatgen_entry.py +++ b/tests/test_to_pymatgen_entry.py @@ -3,27 +3,32 @@ import unittest from context import dpdata from comp_sys import CompSys, IsPBC -from monty.serialization import loadfn +from monty.serialization import loadfn + try: - from pymatgen.entries.computed_entries import ComputedStructureEntry - exist_module=True + from pymatgen.entries.computed_entries import ComputedStructureEntry + + exist_module = True except Exception: - exist_module=False + exist_module = False -@unittest.skipIf(not exist_module,"skip pymatgen") + +@unittest.skipIf(not exist_module, "skip pymatgen") class TestPymatgen(unittest.TestCase): - - def test(self): - ls1= dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.ch4.1step'),fmt='OUTCAR') - entry1=ls1.to_pymatgen_ComputedStructureEntry() - self.assertEqual(entry1,[]) - ls2= dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.h2o.md.10'),fmt='OUTCAR') - entry2=ls2.to_pymatgen_ComputedStructureEntry() - self.assertEqual(len(entry2),10) - last_entry=loadfn("computed_structure_entry.json") - self.assertEqual(last_entry.as_dict(),entry2[-1].as_dict()) - + def test(self): + ls1 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.ch4.1step"), fmt="OUTCAR" + ) + entry1 = ls1.to_pymatgen_ComputedStructureEntry() + self.assertEqual(entry1, []) + ls2 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.h2o.md.10"), fmt="OUTCAR" + ) + entry2 = ls2.to_pymatgen_ComputedStructureEntry() + self.assertEqual(len(entry2), 10) + last_entry = loadfn("computed_structure_entry.json") + self.assertEqual(last_entry.as_dict(), entry2[-1].as_dict()) + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_type_map.py b/tests/test_type_map.py index 1e1e7b3ce..4d98aa7a8 100644 --- a/tests/test_type_map.py +++ b/tests/test_type_map.py @@ -3,37 +3,37 @@ from itertools import permutations -class TestTypeMap(): +class TestTypeMap: def test_check_type_map(self): # read atom names system = dpdata.LabeledSystem(self.fn, fmt=self.fmt) - atom_names = system['atom_names'] + atom_names = system["atom_names"] for type_map in permutations(atom_names, len(atom_names)): type_map = list(type_map) system.check_type_map(type_map=type_map) - self.assertEqual(type_map, system['atom_names']) + self.assertEqual(type_map, system["atom_names"]) def test_type_map_is_superset(self): system = dpdata.LabeledSystem(self.fn, fmt=self.fmt) - atom_names = system['atom_names'] + ["X"] + atom_names = system["atom_names"] + ["X"] for type_map in permutations(atom_names, len(atom_names)): type_map = list(type_map) system = dpdata.LabeledSystem(self.fn, fmt=self.fmt) system.check_type_map(type_map=type_map) - self.assertEqual(type_map, system['atom_names']) + self.assertEqual(type_map, system["atom_names"]) class TestTypeMap1(TestTypeMap, unittest.TestCase): def setUp(self): - self.fn = 'gaussian/methane.gaussianlog' - self.fmt = 'gaussian/log' + self.fn = "gaussian/methane.gaussianlog" + self.fmt = "gaussian/log" class TestTypeMap2(TestTypeMap, unittest.TestCase): def setUp(self): - self.fn = 'cp2k/cp2k_normal_output/cp2k_output' - self.fmt = 'cp2k/output' + self.fn = "cp2k/cp2k_normal_output/cp2k_output" + self.fmt = "cp2k/output" -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_vasp_outcar.py b/tests/test_vasp_outcar.py index 23c40d5c3..2fcaa0a1b 100644 --- a/tests/test_vasp_outcar.py +++ b/tests/test_vasp_outcar.py @@ -5,24 +5,30 @@ from comp_sys import CompLabeledSys, IsPBC from dpdata.utils import uniq_atom_names + class TestVaspOUTCAR(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.system_1 = dpdata.LabeledSystem() - self.system_1.from_vasp_xml('poscars/vasprun.h2o.md.xml') + self.system_1.from_vasp_xml("poscars/vasprun.h2o.md.xml") self.system_2 = dpdata.LabeledSystem() - self.system_2.from_vasp_outcar('poscars/OUTCAR.h2o.md') + self.system_2.from_vasp_outcar("poscars/OUTCAR.h2o.md") self.places = 6 self.e_places = 6 self.f_places = 6 self.v_places = 4 + class TestVaspOUTCARTypeMap(unittest.TestCase, CompLabeledSys, IsPBC): def setUp(self): - sys0 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.unconverged', fmt = 'vasp/outcar') - sys0.data['atom_names'] = ['A', 'C', 'B', 'H', 'D'] - sys0.data['atom_numbs'] = [ 0, 1, 0, 4, 0] - sys0.data['atom_types'] = np.array([ 3, 3, 3, 3, 1], dtype = int) - sys1 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.unconverged', fmt = 'vasp/outcar', type_map = ['A', 'C', 'B', 'H', 'D']) + sys0 = dpdata.LabeledSystem("poscars/OUTCAR.ch4.unconverged", fmt="vasp/outcar") + sys0.data["atom_names"] = ["A", "C", "B", "H", "D"] + sys0.data["atom_numbs"] = [0, 1, 0, 4, 0] + sys0.data["atom_types"] = np.array([3, 3, 3, 3, 1], dtype=int) + sys1 = dpdata.LabeledSystem( + "poscars/OUTCAR.ch4.unconverged", + fmt="vasp/outcar", + type_map=["A", "C", "B", "H", "D"], + ) self.system_1 = sys0 self.system_2 = sys1 self.places = 6 @@ -30,13 +36,18 @@ def setUp(self): self.f_places = 6 self.v_places = 6 + class TestVaspOUTCARSkip(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): begin = 1 step = 3 end = 10 - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md.10', fmt = 'vasp/outcar', begin = begin, step = step) - self.system_2 = dpdata.LabeledSystem('poscars/OUTCAR.h2o.md.10', fmt = 'vasp/outcar').sub_system(np.arange(begin, end, step)) + self.system_1 = dpdata.LabeledSystem( + "poscars/OUTCAR.h2o.md.10", fmt="vasp/outcar", begin=begin, step=step + ) + self.system_2 = dpdata.LabeledSystem( + "poscars/OUTCAR.h2o.md.10", fmt="vasp/outcar" + ).sub_system(np.arange(begin, end, step)) self.places = 6 self.e_places = 6 self.f_places = 6 @@ -44,10 +55,10 @@ def setUp (self) : class TestVaspOUTCARVdw(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : - self.system_1 = dpdata.LabeledSystem('poscars/OUTCAR.Ge.vdw', fmt = 'vasp/outcar') + def setUp(self): + self.system_1 = dpdata.LabeledSystem("poscars/OUTCAR.Ge.vdw", fmt="vasp/outcar") self.system_2 = dpdata.LabeledSystem() - self.system_2.from_vasp_xml('poscars/vasprun.Ge.vdw.xml') + self.system_2.from_vasp_xml("poscars/vasprun.Ge.vdw.xml") self.places = 5 self.e_places = 6 self.f_places = 6 @@ -56,46 +67,50 @@ def setUp (self) : class TestDuplicatedAtomNames(unittest.TestCase): def test(self): - system = dpdata.LabeledSystem('poscars/6362_OUTCAR', fmt = 'vasp/outcar') + system = dpdata.LabeledSystem("poscars/6362_OUTCAR", fmt="vasp/outcar") expected_types = [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1] - self.assertEqual(list(system['atom_types']), expected_types) - self.assertEqual(system['atom_names'], ['B', 'O']) - self.assertEqual(system['atom_numbs'], [8, 6]) + self.assertEqual(list(system["atom_types"]), expected_types) + self.assertEqual(system["atom_names"], ["B", "O"]) + self.assertEqual(system["atom_numbs"], [8, 6]) def test_type_map(self): - system = dpdata.LabeledSystem('poscars/6362_OUTCAR', fmt = 'vasp/outcar', type_map = ['O', 'B']) + system = dpdata.LabeledSystem( + "poscars/6362_OUTCAR", fmt="vasp/outcar", type_map=["O", "B"] + ) expected_types = [1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0] - self.assertEqual(list(system['atom_types']), expected_types) - self.assertEqual(system['atom_names'], ['O', 'B']) - self.assertEqual(system['atom_numbs'], [6, 8]) + self.assertEqual(list(system["atom_types"]), expected_types) + self.assertEqual(system["atom_names"], ["O", "B"]) + self.assertEqual(system["atom_numbs"], [6, 8]) class TestUniqAtomNames(unittest.TestCase): def test(self): data = {} - data['atom_names'] = ['O', 'H', 'O', 'H'] - data['atom_types'] = np.array([0, 1, 2, 3, 3, 2, 1], dtype=int) - + data["atom_names"] = ["O", "H", "O", "H"] + data["atom_types"] = np.array([0, 1, 2, 3, 3, 2, 1], dtype=int) + data = uniq_atom_names(data) - self.assertEqual(list(data['atom_types']), - [0, 1, 0, 1, 1, 0, 1]) - self.assertEqual(list(data['atom_names']), - ['O', 'H']) - self.assertEqual(list(data['atom_numbs']), - [3, 4]) + self.assertEqual(list(data["atom_types"]), [0, 1, 0, 1, 1, 0, 1]) + self.assertEqual(list(data["atom_names"]), ["O", "H"]) + self.assertEqual(list(data["atom_numbs"]), [3, 4]) + class TestVaspOUTCARML(unittest.TestCase): def test(self): - system1 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.ml', fmt = 'vasp/outcar',ml=True) - system2 = dpdata.LabeledSystem('poscars/OUTCAR.ch4.ml', fmt = 'vasp/outcar',ml=False) + system1 = dpdata.LabeledSystem( + "poscars/OUTCAR.ch4.ml", fmt="vasp/outcar", ml=True + ) + system2 = dpdata.LabeledSystem( + "poscars/OUTCAR.ch4.ml", fmt="vasp/outcar", ml=False + ) expected_types = [0, 0, 0, 0, 1] - self.assertEqual(list(system1['atom_types']), expected_types) - self.assertEqual(system1['atom_names'], ['H', 'C']) - self.assertEqual(len(system1['energies']), 10) - self.assertEqual(list(system2['atom_types']), expected_types) - self.assertEqual(system2['atom_names'], ['H', 'C']) - self.assertEqual(len(system2['energies']), 4) + self.assertEqual(list(system1["atom_types"]), expected_types) + self.assertEqual(system1["atom_names"], ["H", "C"]) + self.assertEqual(len(system1["energies"]), 10) + self.assertEqual(list(system2["atom_types"]), expected_types) + self.assertEqual(system2["atom_names"], ["H", "C"]) + self.assertEqual(len(system2["energies"]), 4) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_vasp_poscar_dump.py b/tests/test_vasp_poscar_dump.py index 0e42e49dd..f83e208ca 100644 --- a/tests/test_vasp_poscar_dump.py +++ b/tests/test_vasp_poscar_dump.py @@ -2,48 +2,59 @@ import numpy as np import unittest from context import dpdata -from poscars.poscar_ref_oh import TestPOSCARoh +from poscars.poscar_ref_oh import TestPOSCARoh + def myfilecmp(test, f0, f1): - with open(f0) as fp0 : + with open(f0) as fp0: with open(f1) as fp1: test.assertTrue(fp0.read() == fp1.read()) + class TestPOSCARDump(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): tmp_system = dpdata.System() # tmp_system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.d')) - tmp_system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_vasp_poscar('tmp.POSCAR') + tmp_system.from_lammps_lmp( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + tmp_system.to_vasp_poscar("tmp.POSCAR") self.system = dpdata.System() - self.system.from_vasp_poscar('tmp.POSCAR') + self.system.from_vasp_poscar("tmp.POSCAR") + class TestPOSCARDump1(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): tmp_system = dpdata.System() - tmp_system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.d')) + tmp_system.from_vasp_poscar(os.path.join("poscars", "POSCAR.oh.d")) # tmp_system.from_lammps_lmp(os.path.join('poscars', 'conf.lmp'), type_map = ['O', 'H']) - tmp_system.to_vasp_poscar('tmp.POSCAR') + tmp_system.to_vasp_poscar("tmp.POSCAR") self.system = dpdata.System() - self.system.from_vasp_poscar('tmp.POSCAR') + self.system.from_vasp_poscar("tmp.POSCAR") + -class TestPOSCARSkipZeroAtomNumb(unittest.TestCase) : +class TestPOSCARSkipZeroAtomNumb(unittest.TestCase): def tearDown(self): - if os.path.isfile('POSCAR.tmp.1'): - os.remove('POSCAR.tmp.1') - if os.path.isfile('POSCAR.tmp.2'): - os.remove('POSCAR.tmp.2') + if os.path.isfile("POSCAR.tmp.1"): + os.remove("POSCAR.tmp.1") + if os.path.isfile("POSCAR.tmp.2"): + os.remove("POSCAR.tmp.2") def test_dump_vasp_type_map(self): - system0 = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d'), fmt = 'vasp/poscar', type_map = ['H', 'O']) - system0.to_vasp_poscar('POSCAR.tmp.1') - system1 = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d'), fmt = 'vasp/poscar', type_map = ['C', 'H', 'A', 'O', 'B']) - system1.to_vasp_poscar('POSCAR.tmp.2') - myfilecmp(self, 'POSCAR.tmp.1', 'POSCAR.tmp.2') + system0 = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d"), + fmt="vasp/poscar", + type_map=["H", "O"], + ) + system0.to_vasp_poscar("POSCAR.tmp.1") + system1 = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d"), + fmt="vasp/poscar", + type_map=["C", "H", "A", "O", "B"], + ) + system1.to_vasp_poscar("POSCAR.tmp.2") + myfilecmp(self, "POSCAR.tmp.1", "POSCAR.tmp.2") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_vasp_poscar_to_system.py b/tests/test_vasp_poscar_to_system.py index 760da63a6..7eea37577 100644 --- a/tests/test_vasp_poscar_to_system.py +++ b/tests/test_vasp_poscar_to_system.py @@ -5,38 +5,48 @@ from comp_sys import CompSys, IsPBC from poscars.poscar_ref_oh import TestPOSCARoh + class TestPOSCARCart(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.c')) + self.system.from_vasp_poscar(os.path.join("poscars", "POSCAR.oh.c")) + class TestPOSCARDirect(unittest.TestCase, TestPOSCARoh): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_vasp_poscar(os.path.join('poscars', 'POSCAR.oh.d')) + self.system.from_vasp_poscar(os.path.join("poscars", "POSCAR.oh.d")) + + +class TestPOSCARDirectDuplicated(unittest.TestCase): + def test(self): + ss = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d.dup"), fmt="vasp/poscar" + ) + self.assertEqual(ss["atom_names"], ["O", "H"]) + self.assertEqual(ss["atom_numbs"], [2, 1]) + self.assertEqual(list(ss["atom_types"]), [0, 1, 0]) -class TestPOSCARDirectDuplicated(unittest.TestCase): - def test(self): - ss = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d.dup'), fmt='vasp/poscar') - self.assertEqual(ss['atom_names'], ['O', 'H']) - self.assertEqual(ss['atom_numbs'], [2, 1]) - self.assertEqual(list(ss['atom_types']), [0, 1, 0]) + def test_type_map(self): + ss = dpdata.System( + os.path.join("poscars", "POSCAR.oh.d.dup"), + fmt="vasp/poscar", + type_map=["H", "O"], + ) + self.assertEqual(ss["atom_names"], ["H", "O"]) + self.assertEqual(ss["atom_numbs"], [1, 2]) + self.assertEqual(list(ss["atom_types"]), [1, 0, 1]) - def test_type_map(self): - ss = dpdata.System(os.path.join('poscars', 'POSCAR.oh.d.dup'), fmt='vasp/poscar', type_map=['H', 'O']) - self.assertEqual(ss['atom_names'], ['H', 'O']) - self.assertEqual(ss['atom_numbs'], [1, 2]) - self.assertEqual(list(ss['atom_types']), [1, 0, 1]) class TestVaspPOSCARTypeMap(unittest.TestCase, CompSys, IsPBC): def setUp(self): - sys0 = dpdata.System('poscars/POSCAR.oh.d', fmt = 'vasp/poscar') - sys0.data['atom_names'] = ['A', 'H', 'B', 'O', 'D'] - sys0.data['atom_numbs'] = [ 0, 1, 0, 1, 0] - sys0.data['atom_types'] = np.array([ 3, 1], dtype = int) - sys1 = dpdata.System('poscars/POSCAR.oh.d', fmt = 'vasp/poscar', type_map = ['A', 'H', 'B', 'O', 'D']) + sys0 = dpdata.System("poscars/POSCAR.oh.d", fmt="vasp/poscar") + sys0.data["atom_names"] = ["A", "H", "B", "O", "D"] + sys0.data["atom_numbs"] = [0, 1, 0, 1, 0] + sys0.data["atom_types"] = np.array([3, 1], dtype=int) + sys1 = dpdata.System( + "poscars/POSCAR.oh.d", fmt="vasp/poscar", type_map=["A", "H", "B", "O", "D"] + ) self.system_1 = sys0 self.system_2 = sys1 self.places = 6 @@ -45,5 +55,5 @@ def setUp(self): self.v_places = 6 -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_vasp_unconverged_outcar.py b/tests/test_vasp_unconverged_outcar.py index 90bb05e89..579edf09f 100644 --- a/tests/test_vasp_unconverged_outcar.py +++ b/tests/test_vasp_unconverged_outcar.py @@ -5,23 +5,24 @@ class TestSingleStep(unittest.TestCase): - def setUp(self): - self.LabeledSystem1 = dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.ch4.unconverged'),\ - fmt='outcar' ) + self.LabeledSystem1 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.ch4.unconverged"), fmt="outcar" + ) - self.LabeledSystem2 = dpdata.LabeledSystem(os.path.join('poscars', 'OUTCAR.ch4.1step'),\ - fmt='outcar' ) + self.LabeledSystem2 = dpdata.LabeledSystem( + os.path.join("poscars", "OUTCAR.ch4.1step"), fmt="outcar" + ) - def test_unconverged(self) : + def test_unconverged(self): - self.assertEqual(self.LabeledSystem1['energies'], -23.94708651) + self.assertEqual(self.LabeledSystem1["energies"], -23.94708651) self.assertEqual(self.LabeledSystem1.get_nframes(), 1) self.assertEqual(self.LabeledSystem1.get_natoms(), 5) - def test_single_step(self) : - self.assertEqual(self.LabeledSystem2.get_nframes(), 0) + def test_single_step(self): + self.assertEqual(self.LabeledSystem2.get_nframes(), 0) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_vasp_xml.py b/tests/test_vasp_xml.py index ed7ac6e80..a3cd90d03 100644 --- a/tests/test_vasp_xml.py +++ b/tests/test_vasp_xml.py @@ -6,32 +6,33 @@ from comp_sys import CompLabeledSys from comp_sys import IsPBC + class TestVaspXml(unittest.TestCase, CompSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 6 xml_sys = dpdata.LabeledSystem() - xml_sys.from_vasp_xml('poscars/vasprun.h2o.md.xml') + xml_sys.from_vasp_xml("poscars/vasprun.h2o.md.xml") # init_sys = dpdata.System() # init_sys.from_vasp_poscar('poscars/POSCAR.h2o.md') finl_sys = dpdata.System() - finl_sys.from_vasp_poscar('poscars/CONTCAR.h2o.md') + finl_sys.from_vasp_poscar("poscars/CONTCAR.h2o.md") self.system_1 = finl_sys self.system_2 = xml_sys.sub_system([-1]) class TestVaspXmlRotSys(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 4 # rotated vasp computation, subject to numerical error self.e_places = 3 self.f_places = 2 self.v_places = 1 - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.tribox.xml') - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.tribox.lower.xml') + self.system_1 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.tribox.xml") + self.system_2 = dpdata.LabeledSystem("poscars/vasprun.h2o.md.tribox.lower.xml") class TestVaspXmlSkip(unittest.TestCase, CompLabeledSys, IsPBC): - def setUp (self) : + def setUp(self): self.places = 6 # rotated vasp computation, subject to numerical error self.e_places = 6 @@ -40,9 +41,13 @@ def setUp (self) : begin = 2 end = 10 step = 3 - self.system_1 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml', begin = begin, step = step) - self.system_2 = dpdata.LabeledSystem('poscars/vasprun.h2o.md.10.xml').sub_system(np.arange(2,10,3)) + self.system_1 = dpdata.LabeledSystem( + "poscars/vasprun.h2o.md.10.xml", begin=begin, step=step + ) + self.system_2 = dpdata.LabeledSystem( + "poscars/vasprun.h2o.md.10.xml" + ).sub_system(np.arange(2, 10, 3)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/tests/test_water_ions.py b/tests/test_water_ions.py index f2fab203b..98b3838b5 100644 --- a/tests/test_water_ions.py +++ b/tests/test_water_ions.py @@ -2,26 +2,32 @@ import numpy as np import unittest from context import dpdata + try: import ase - import ase.neighborlist - exist_ase=True + import ase.neighborlist + + exist_ase = True except Exception: - exist_ase=False + exist_ase = False + class TestIons(unittest.TestCase): - - def setUp(self): + def setUp(self): self.system = dpdata.System() - self.system.from_lammps_lmp(os.path.join('poscars', 'conf.waterion.lmp'), - type_map = ['O', 'H']) - self.bonds = dpdata.md.water.compute_bonds(self.system.data['cells'][0], - self.system.data['coords'][0], - self.system.data['atom_types']) - - def test_ions_count(self) : - no, noh, noh2, noh3, nh \ - = dpdata.md.water.find_ions(self.system.data['atom_types'], self.bonds) + self.system.from_lammps_lmp( + os.path.join("poscars", "conf.waterion.lmp"), type_map=["O", "H"] + ) + self.bonds = dpdata.md.water.compute_bonds( + self.system.data["cells"][0], + self.system.data["coords"][0], + self.system.data["atom_types"], + ) + + def test_ions_count(self): + no, noh, noh2, noh3, nh = dpdata.md.water.find_ions( + self.system.data["atom_types"], self.bonds + ) self.assertEqual(len(no), 0) self.assertEqual(len(noh), 1) self.assertEqual(len(noh2), 125) @@ -35,14 +41,19 @@ def test_ions_count(self) : class TestAseComputeBond(unittest.TestCase): def setUp(self): self.system = dpdata.System() - self.system.from_lammps_lmp(os.path.join('poscars', 'conf.waterion.lmp'), - type_map = ['O', 'H']) - self.bonds = dpdata.md.water.compute_bonds_naive(self.system.data['cells'][0], - self.system.data['coords'][0], - self.system.data['atom_types']) - self.bonds_ase = dpdata.md.water.compute_bonds_ase(self.system.data['cells'][0], - self.system.data['coords'][0], - self.system.data['atom_types']) + self.system.from_lammps_lmp( + os.path.join("poscars", "conf.waterion.lmp"), type_map=["O", "H"] + ) + self.bonds = dpdata.md.water.compute_bonds_naive( + self.system.data["cells"][0], + self.system.data["coords"][0], + self.system.data["atom_types"], + ) + self.bonds_ase = dpdata.md.water.compute_bonds_ase( + self.system.data["cells"][0], + self.system.data["coords"][0], + self.system.data["atom_types"], + ) def test_bond_identity(self): self.assertTrue(len(self.bonds), len(self.bonds_ase)) @@ -50,7 +61,5 @@ def test_bond_identity(self): self.assertTrue(set(self.bonds[ii]) == set(self.bonds_ase[ii])) - -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() - diff --git a/tests/test_xyz.py b/tests/test_xyz.py index 7d9649f94..8f0779b31 100644 --- a/tests/test_xyz.py +++ b/tests/test_xyz.py @@ -4,17 +4,20 @@ from context import dpdata from comp_sys import CompSys, IsNoPBC + class TestToXYZ(unittest.TestCase): def test_to_xyz(self): - with tempfile.NamedTemporaryFile('r') as f_xyz: - dpdata.System(data={ - "atom_names": ["C", "O"], - "atom_numbs": [1, 1], - "atom_types": np.array([0, 1]), - "coords": np.arange(6).reshape((1,2,3)), - "cells": np.zeros((1,3,3)), - "orig": np.zeros(3), - }).to("xyz", f_xyz.name) + with tempfile.NamedTemporaryFile("r") as f_xyz: + dpdata.System( + data={ + "atom_names": ["C", "O"], + "atom_numbs": [1, 1], + "atom_types": np.array([0, 1]), + "coords": np.arange(6).reshape((1, 2, 3)), + "cells": np.zeros((1, 3, 3)), + "orig": np.zeros(3), + } + ).to("xyz", f_xyz.name) xyz0 = f_xyz.read().strip() xyz1 = "2\n\nC 0.000000 1.000000 2.000000\nO 3.000000 4.000000 5.000000" self.assertEqual(xyz0, xyz1) @@ -24,15 +27,17 @@ class TestFromXYZ(unittest.TestCase, CompSys, IsNoPBC): def setUp(self): self.places = 6 # considering to_xyz has been tested.. - self.system_1 = dpdata.System(data={ + self.system_1 = dpdata.System( + data={ "atom_names": ["C", "O"], "atom_numbs": [1, 1], "atom_types": np.array([0, 1]), - "coords": np.arange(6).reshape((1,2,3)), - "cells": np.zeros((1,3,3)), + "coords": np.arange(6).reshape((1, 2, 3)), + "cells": np.zeros((1, 3, 3)), "orig": np.zeros(3), "nopbc": True, - }) - with tempfile.NamedTemporaryFile('r') as f_xyz: + } + ) + with tempfile.NamedTemporaryFile("r") as f_xyz: self.system_1.to("xyz", f_xyz.name) self.system_2 = dpdata.System(f_xyz.name, fmt="xyz")