diff --git a/.git_archival.txt b/.git_archival.txt index 8fb235d70..7c5100942 100644 --- a/.git_archival.txt +++ b/.git_archival.txt @@ -1,4 +1,3 @@ node: $Format:%H$ node-date: $Format:%cI$ describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ -ref-names: $Format:%D$ diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 650ff59ef..1dd682776 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,11 +1,11 @@ -name: Python package +name: Benchmark on: - push - pull_request jobs: - build: + benchmark: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -15,9 +15,9 @@ jobs: python-version: 3.12 - run: curl -LsSf https://astral.sh/uv/install.sh | sh - name: Install dependencies - run: uv pip install --system .[amber,ase,pymatgen,benchmark] rdkit openbabel-wheel + run: uv pip install --system .[test,amber,ase,pymatgen,benchmark] rdkit openbabel-wheel - name: Run benchmarks - uses: CodSpeedHQ/action@v2 + uses: CodSpeedHQ/action@v3 with: token: ${{ secrets.CODSPEED_TOKEN }} run: pytest benchmark/ --codspeed diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a17fcde1..a6d23270c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,7 +20,7 @@ jobs: python-version: ${{ matrix.python-version }} - run: curl -LsSf https://astral.sh/uv/install.sh | sh - name: Install dependencies - run: uv pip install --system .[amber,ase,pymatgen] coverage ./tests/plugin rdkit openbabel-wheel + run: uv pip install --system .[test,amber,ase,pymatgen] coverage ./tests/plugin rdkit openbabel-wheel - name: Test run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report - name: Run codecov diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 23bea3ebd..3f43b6975 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.4.7 + rev: v0.6.3 hooks: - id: ruff args: ["--fix"] @@ -36,7 +36,7 @@ repos: args: ["--write"] # Python inside docs - repo: https://github.com/asottile/blacken-docs - rev: 1.16.0 + rev: 1.18.0 hooks: - id: blacken-docs ci: diff --git a/README.md b/README.md index ab489a75e..6f9246b1c 100644 --- a/README.md +++ b/README.md @@ -4,320 +4,47 @@ [![pip install](https://img.shields.io/pypi/dm/dpdata?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdata) [![Documentation Status](https://readthedocs.org/projects/dpdata/badge/)](https://dpdata.readthedocs.io/) -**dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian. -dpdata only works with python 3.7 or above. - +**dpdata** is a Python package for manipulating atomistic data of software in computational science. ## Installation -One can download the source code of dpdata by -```bash -git clone https://github.com/deepmodeling/dpdata.git dpdata -``` -then use `pip` to install the module from source -```bash -cd dpdata -pip install . -``` - -`dpdata` can also by install via pip without source -```bash -pip install dpdata -``` - -## Quick start - -This section gives some examples on how dpdata works. Firstly one needs to import the module in a python 3.x compatible code. -```python -import dpdata -``` -The typicall workflow of `dpdata` is - -1. Load data from vasp or lammps or deepmd-kit data files. -2. Manipulate data -3. Dump data to in a desired format - - -### Load data -```python -d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar") -``` -or let dpdata infer the format (`vasp/poscar`) of the file from the file name extension -```python -d_poscar = dpdata.System("my.POSCAR") -``` -The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data `System` called `d_poscar`. -A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`. -It is noted that `POSCAR` only contains one frame. -If the multiple frames stored in, for example, a `OUTCAR` is wanted, -```python -d_outcar = dpdata.LabeledSystem("OUTCAR") -``` -The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any), are loaded by `LabeledSystem`. It is noted that the forces of atoms are always assumed to exist. `LabeledSystem` is a derived class of `System`. +DP-GEN only supports Python 3.7 and above. You can [setup a conda/pip environment](https://docs.deepmodeling.com/faq/conda.html), and then use one of the following methods to install DP-GEN: -The `System` or `LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`: +- Install via pip: `pip install dpdata` +- Install via conda: `conda install -c conda-forge dpdata` +- Install from source code: `git clone https://github.com/deepmodeling/dpdata && pip install ./dpdata` -| Software| format | multi frames | labeled | class | format key | -| ------- | :--- | :---: | :---: | :--- | :--- | -| vasp | poscar | False | False | System | 'vasp/poscar' | -| vasp | outcar | True | True | LabeledSystem | 'vasp/outcar' | -| vasp | xml | True | True | LabeledSystem | 'vasp/xml' | -| lammps | lmp | False | False | System | 'lammps/lmp' | -| lammps | dump | True | False | System | 'lammps/dump' | -| deepmd | raw | True | False | System | 'deepmd/raw' | -| deepmd | npy | True | False | System | 'deepmd/npy' | -| deepmd | raw | True | True | LabeledSystem | 'deepmd/raw' | -| deepmd | npy | True | True | LabeledSystem | 'deepmd/npy' | -| deepmd | npy | True | True | MultiSystems | 'deepmd/npy/mixed' | -| deepmd | npy | True | False | MultiSystems | 'deepmd/npy/mixed' | -| gaussian| log | False | True | LabeledSystem | 'gaussian/log'| -| gaussian| log | True | True | LabeledSystem | 'gaussian/md' | -| siesta | output | False | True | LabeledSystem | 'siesta/output'| -| siesta | aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' | -| cp2k(deprecated in future) | output | False | True | LabeledSystem | 'cp2k/output' | -| cp2k(deprecated in future) | aimd_output | True | True | LabeledSystem | 'cp2k/aimd_output' | -| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata)) | stdout | False | True | LabeledSystem | 'cp2kdata/e_f' | -| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata)) | stdout | True | True | LabeledSystem | 'cp2kdata/md' | -| QE | log | False | True | LabeledSystem | 'qe/pw/scf' | -| QE | log | True | False | System | 'qe/cp/traj' | -| QE | log | True | True | LabeledSystem | 'qe/cp/traj' | -| Fhi-aims| output | True | True | LabeledSystem | 'fhi_aims/md' | -| Fhi-aims| output | False | True | LabeledSystem | 'fhi_aims/scf' | -|quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'| -| PWmat | atom.config | False | False | System | 'pwmat/atom.config' | -| PWmat | movement | True | True | LabeledSystem | 'pwmat/movement' | -| PWmat | OUT.MLMD | True | True | LabeledSystem | 'pwmat/out.mlmd' | -| Amber | multi | True | True | LabeledSystem | 'amber/md' | -| Amber/sqm | sqm.out | False | False | System | 'sqm/out' | -| Gromacs | gro | True | False | System | 'gromacs/gro' | -| ABACUS | STRU | False | False | System | 'abacus/stru' | -| ABACUS | STRU | False | True | LabeledSystem | 'abacus/scf' | -| ABACUS | cif | True | True | LabeledSystem | 'abacus/md' | -| ABACUS | STRU | True | True | LabeledSystem | 'abacus/relax' | -| ase | structure | True | True | MultiSystems | 'ase/structure' | -| DFTB+ | dftbplus | False | True | LabeledSystem | 'dftbplus' | -| n2p2 | n2p2 | True | True | LabeledSystem | 'n2p2' | - - -The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems. - -Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory -Recursively and find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports. - -Use `dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats. - -For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type. - -The following commands relating to `Class dpdata.MultiSystems` may be useful. -```python -# load data - -xyz_multi_systems = dpdata.MultiSystems.from_file( - file_name="tests/xyz/xyz_unittest.xyz", fmt="quip/gap/xyz" -) -vasp_multi_systems = dpdata.MultiSystems.from_dir( - dir_name="./mgal_outcar", file_name="OUTCAR", fmt="vasp/outcar" -) - -# use wildcard -vasp_multi_systems = dpdata.MultiSystems.from_dir( - dir_name="./mgal_outcar", file_name="*OUTCAR", fmt="vasp/outcar" -) - -# print the multi_system infomation -print(xyz_multi_systems) -print(xyz_multi_systems.systems) # return a dictionaries - -# print the system infomation -print(xyz_multi_systems.systems["B1C9"].data) - -# dump a system's data to ./my_work_dir/B1C9_raw folder -xyz_multi_systems.systems["B1C9"].to_deepmd_raw("./my_work_dir/B1C9_raw") - -# dump all systems -xyz_multi_systems.to_deepmd_raw("./my_deepmd_data/") -``` - -You may also use the following code to parse muti-system: -```python -from dpdata import LabeledSystem, MultiSystems -from glob import glob - -""" -process multi systems -""" -fs = glob("./*/OUTCAR") # remeber to change here !!! -ms = MultiSystems() -for f in fs: - try: - ls = LabeledSystem(f) - except: - print(f) - if len(ls) > 0: - ms.append(ls) - -ms.to_deepmd_raw("deepmd") -ms.to_deepmd_npy("deepmd") -``` +To test if the installation is successful, you may execute -### Access data -These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example -```python -coords = d_outcar["coords"] -``` -Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system) - -| key | type | dimension | are labels | description -| --- | --- | --- | --- | --- -| 'atom_names' | list of str | ntypes | False | The name of each atom type -| 'atom_numbs' | list of int | ntypes | False | The number of atoms of each atom type -| 'atom_types' | np.ndarray | natoms | False | Array assigning type to each atom -| 'cells' | np.ndarray | nframes x 3 x 3 | False | The cell tensor of each frame -| 'coords' | np.ndarray | nframes x natoms x 3 | False | The atom coordinates -| 'energies' | np.ndarray | nframes | True | The frame energies -| 'forces' | np.ndarray | nframes x natoms x 3 | True | The atom forces -| 'virials' | np.ndarray | nframes x 3 x 3 | True | The virial tensor of each frame - - -### Dump data -The data stored in `System` or `LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example: -```python -d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0) -``` -The first frames of `d_outcar` will be dumped to 'conf.lmp' -```python -d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1) -``` -The last frames of `d_outcar` will be dumped to 'POSCAR'. - -The data stored in `LabeledSystem` can be dumped to deepmd-kit raw format, for example -```python -d_outcar.to("deepmd/raw", "dpmd_raw") -``` -Or a simpler command: -```python -dpdata.LabeledSystem("OUTCAR").to("deepmd/raw", "dpmd_raw") -``` -Frame selection can be implemented by -```python -dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw") -``` -by which only the first and last frames are dumped to `dpmd_raw`. - - -### replicate -dpdata will create a super cell of the current atom configuration. -```python -dpdata.System("./POSCAR").replicate( - ( - 1, - 2, - 3, - ) -) -``` -tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction. - - -### perturb -By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). -```python -perturbed_system = dpdata.System("./POSCAR").perturb( - pert_num=3, - cell_pert_fraction=0.05, - atom_pert_distance=0.6, - atom_pert_style="normal", -) -print(perturbed_system.data) -``` - -### replace -By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged. -```python -s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar") -s.replace("Hf", "Zr", 8) -s.to_vasp_poscar("POSCAR.P42nmc.replace") -``` - -## BondOrderSystem -A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). -```python -import dpdata - -system_1 = dpdata.BondOrderSystem( - "tests/bond_order/CH3OH.mol", fmt="mol" -) # read from .mol file -system_2 = dpdata.BondOrderSystem( - "tests/bond_order/methane.sdf", fmt="sdf" -) # read from .sdf file -``` -In sdf file, all molecules must be of the same topology (i.e. conformers of the same molecular configuration). -`BondOrderSystem` also supports initialize from a `rdkit.Chem.rdchem.Mol` object directly. -```python -from rdkit import Chem -from rdkit.Chem import AllChem -import dpdata - -mol = Chem.MolFromSmiles("CC") -mol = Chem.AddHs(mol) -AllChem.EmbedMultipleConfs(mol, 10) -system = dpdata.BondOrderSystem(rdkit_mol=mol) -``` - -### Bond Order Assignment -The `BondOrderSystem` implements a more robust sanitize procedure for rdkit Mol, as defined in `dpdata.rdkit.santizie.Sanitizer`. This class defines 3 level of sanitization process by: low, medium and high. (default is medium). -+ low: use `rdkit.Chem.SanitizeMol()` function to sanitize molecule. -+ medium: before using rdkit, the programm will first assign formal charge of each atom to avoid inappropriate valence exceptions. However, this mode requires the rightness of the bond order information in the given molecule. -+ high: the program will try to fix inappropriate bond orders in aromatic hetreocycles, phosphate, sulfate, carboxyl, nitro, nitrine, guanidine groups. If this procedure fails to sanitize the given molecule, the program will then try to call `obabel` to pre-process the mol and repeat the sanitization procedure. **That is to say, if you wan't to use this level of sanitization, please ensure `obabel` is installed in the environment.** -According to our test, our sanitization procedure can successfully read 4852 small molecules in the PDBBind-refined-set. It is necessary to point out that the in the molecule file (mol/sdf), the number of explicit hydrogens has to be correct. Thus, we recommend to use - `obabel xxx -O xxx -h` to pre-process the file. The reason why we do not implement this hydrogen-adding procedure in dpdata is that we can not ensure its correctness. - -```python -import dpdata - -for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): - syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False) -``` -### Formal Charge Assignment -BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As -```python -import dpdata - -syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt="mol") -print(syst.get_formal_charges()) # return the formal charge on each atom -print(syst.get_charge()) # return the total charge of the system +```bash +dpdata --version ``` -If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule. +## Supported packages -## Mixed Type Format -The format `deepmd/npy/mixed` is the mixed type numpy format for DeePMD-kit, and can be loaded or dumped through class `dpdata.MultiSystems`. +`dpdata` is aimmed to support different kinds of atomistic packages: -Under this format, systems with the same number of atoms but different formula can be put together -for a larger system, especially when the frame numbers in systems are sparse. +- Atomistic machine learning packages, such as [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit); +- Molecular dynamics packages, such as [LAMMPS](https://github.com/lammps/lammps) and [GROMACS](https://gitlab.com/gromacs/gromacs); +- Quantum chemistry packages, such as [VASP](https://www.vasp.at/), [Gaussian](https://gaussian.com), and [ABACUS](https://github.com/deepmodeling/abacus-develop); +- Atomistic visualization packages, such as [3Dmol.js](https://3dmol.csb.pitt.edu/). +- Other atomistic tools, such as [ASE](https://gitlab.com/ase/ase). +- Common formats such as `xyz`. -This also helps to mixture the type information together for model training with type embedding network. +All supported formats are listed [here](https://docs.deepmodeling.com/projects/dpdata/en/master/formats.html). -Here are examples using `deepmd/npy/mixed` format: +## Quick start -- Dump a MultiSystems into a mixed type numpy directory: -```python -import dpdata +The quickest way to convert a simple file from one format to another one is to use the [command line](https://docs.deepmodeling.com/projects/dpdata/en/master/cli.html). -dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") +```sh +dpdata OUTCAR -i vasp/outcar -o deepmd/npy -O deepmd_data ``` -- Load a mixed type data into a MultiSystems: -```python -import dpdata - -dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") -``` +For advanced usage with Python APIs, [read dpdata documentation](https://docs.deepmodeling.com/projects/dpdata/). ## Plugins -One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's critical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in [`pyproject.toml`](plugin_example/pyproject.toml): -```toml -[project.entry-points.'dpdata.plugins'] -random = "dpdata_random:RandomFormat" -``` +- [cp2kdata](https://github.com/robinzyb/cp2kdata) adds the latest CP2K support for dpdata. + +For how to create your own plugin packages, [read dpdata documentation](https://docs.deepmodeling.com/projects/dpdata/). diff --git a/docs/conf.py b/docs/conf.py index e3c0b3d4c..a21d19d56 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,7 +49,7 @@ "sphinx.ext.viewcode", "sphinx.ext.intersphinx", "numpydoc", - "m2r2", + "myst_parser", "sphinxarg.ext", "jupyterlite_sphinx", ] diff --git a/docs/index.rst b/docs/index.rst index 07e381de7..3d98bd566 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,22 +6,23 @@ Welcome to dpdata's documentation! ================================== +dpdata is a Python package for manipulating atomistic data of software in computational science. + .. toctree:: :maxdepth: 2 :caption: Contents: - Overview + installation + systems/index try_dpdata cli formats drivers minimizers + plugin api/api credits -.. mdinclude:: ../README.md - - Indices and tables ================== diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 000000000..064f91331 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,13 @@ +# Installation + +DP-GEN only supports Python 3.7 and above. You can [setup a conda/pip environment](https://docs.deepmodeling.com/faq/conda.html), and then use one of the following methods to install DP-GEN: + +- Install via pip: `pip install dpdata` +- Install via conda: `conda install -c conda-forge dpdata` +- Install from source code: `git clone https://github.com/deepmodeling/dpdata && pip install ./dpdata` + +To test if the installation is successful, you may execute + +```bash +dpdata --version +``` diff --git a/docs/plugin.md b/docs/plugin.md new file mode 100644 index 000000000..44e8eabf7 --- /dev/null +++ b/docs/plugin.md @@ -0,0 +1,9 @@ +# Plugins + +One can follow a simple example under `plugin_example/` directory to add their own format by creating and installing plugins. +It's critical to add the :class:`Format` class to `entry_points['dpdata.plugins']` in `pyproject.toml`: + +```toml +[project.entry-points.'dpdata.plugins'] +random = "dpdata_random:RandomFormat" +``` diff --git a/docs/systems/bond_order_system.md b/docs/systems/bond_order_system.md new file mode 100644 index 000000000..443120a76 --- /dev/null +++ b/docs/systems/bond_order_system.md @@ -0,0 +1,51 @@ + +## BondOrderSystem +A new class :class:`BondOrderSystem` which inherits from class :class:`System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). +```python +import dpdata + +system_1 = dpdata.BondOrderSystem( + "tests/bond_order/CH3OH.mol", fmt="mol" +) # read from .mol file +system_2 = dpdata.BondOrderSystem( + "tests/bond_order/methane.sdf", fmt="sdf" +) # read from .sdf file +``` +In sdf file, all molecules must be of the same topology (i.e. conformers of the same molecular configuration). +`BondOrderSystem` also supports initialize from a :class:`rdkit.Chem.rdchem.Mol` object directly. +```python +from rdkit import Chem +from rdkit.Chem import AllChem +import dpdata + +mol = Chem.MolFromSmiles("CC") +mol = Chem.AddHs(mol) +AllChem.EmbedMultipleConfs(mol, 10) +system = dpdata.BondOrderSystem(rdkit_mol=mol) +``` + +### Bond Order Assignment +The :class:`BondOrderSystem` implements a more robust sanitize procedure for rdkit Mol, as defined in :class:`dpdata.rdkit.santizie.Sanitizer`. This class defines 3 level of sanitization process by: low, medium and high. (default is medium). ++ low: use `rdkit.Chem.SanitizeMol()` function to sanitize molecule. ++ medium: before using rdkit, the programm will first assign formal charge of each atom to avoid inappropriate valence exceptions. However, this mode requires the rightness of the bond order information in the given molecule. ++ high: the program will try to fix inappropriate bond orders in aromatic hetreocycles, phosphate, sulfate, carboxyl, nitro, nitrine, guanidine groups. If this procedure fails to sanitize the given molecule, the program will then try to call `obabel` to pre-process the mol and repeat the sanitization procedure. **That is to say, if you wan't to use this level of sanitization, please ensure `obabel` is installed in the environment.** +According to our test, our sanitization procedure can successfully read 4852 small molecules in the PDBBind-refined-set. It is necessary to point out that the in the molecule file (mol/sdf), the number of explicit hydrogens has to be correct. Thus, we recommend to use + `obabel xxx -O xxx -h` to pre-process the file. The reason why we do not implement this hydrogen-adding procedure in dpdata is that we can not ensure its correctness. + +```python +import dpdata + +for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): + syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False) +``` +### Formal Charge Assignment +BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As +```python +import dpdata + +syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt="mol") +print(syst.get_formal_charges()) # return the formal charge on each atom +print(syst.get_charge()) # return the total charge of the system +``` + +If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule. diff --git a/docs/systems/index.rst b/docs/systems/index.rst new file mode 100644 index 000000000..dfcbe698e --- /dev/null +++ b/docs/systems/index.rst @@ -0,0 +1,11 @@ +Systems +======= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + system + multi + bond_order_system + mixed diff --git a/docs/systems/mixed.md b/docs/systems/mixed.md new file mode 100644 index 000000000..69bf80ea8 --- /dev/null +++ b/docs/systems/mixed.md @@ -0,0 +1,24 @@ +# Mixed Type Format + +The format `deepmd/npy/mixed` is the mixed type numpy format for DeePMD-kit, and can be loaded or dumped through class :class:`dpdata.MultiSystems`. + +Under this format, systems with the same number of atoms but different formula can be put together +for a larger system, especially when the frame numbers in systems are sparse. + +This also helps to mixture the type information together for model training with type embedding network. + +Here are examples using `deepmd/npy/mixed` format: + +- Dump a MultiSystems into a mixed type numpy directory: +```python +import dpdata + +dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") +``` + +- Load a mixed type data into a MultiSystems: +```python +import dpdata + +dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") +``` diff --git a/docs/systems/multi.md b/docs/systems/multi.md new file mode 100644 index 000000000..8e47acf70 --- /dev/null +++ b/docs/systems/multi.md @@ -0,0 +1,62 @@ +# `MultiSystems` + +The Class :class:`dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems. + +Use :meth:`dpdata.MultiSystems.from_dir` to read from a directory, :class:`dpdata.MultiSystems` will walk in the directory +Recursively and find all file with specific file_name. Supports all the file formats that :class:`dpdata.LabeledSystem` supports. + +Use :meth:`dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats. + +For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type. + +The following commands relating to :class:`dpdata.MultiSystems` may be useful. +```python +# load data + +xyz_multi_systems = dpdata.MultiSystems.from_file( + file_name="tests/xyz/xyz_unittest.xyz", fmt="quip/gap/xyz" +) +vasp_multi_systems = dpdata.MultiSystems.from_dir( + dir_name="./mgal_outcar", file_name="OUTCAR", fmt="vasp/outcar" +) + +# use wildcard +vasp_multi_systems = dpdata.MultiSystems.from_dir( + dir_name="./mgal_outcar", file_name="*OUTCAR", fmt="vasp/outcar" +) + +# print the multi_system infomation +print(xyz_multi_systems) +print(xyz_multi_systems.systems) # return a dictionaries + +# print the system infomation +print(xyz_multi_systems.systems["B1C9"].data) + +# dump a system's data to ./my_work_dir/B1C9_raw folder +xyz_multi_systems.systems["B1C9"].to_deepmd_raw("./my_work_dir/B1C9_raw") + +# dump all systems +xyz_multi_systems.to_deepmd_raw("./my_deepmd_data/") +``` + +You may also use the following code to parse muti-system: +```python +from dpdata import LabeledSystem, MultiSystems +from glob import glob + +""" +process multi systems +""" +fs = glob("./*/OUTCAR") # remeber to change here !!! +ms = MultiSystems() +for f in fs: + try: + ls = LabeledSystem(f) + except: + print(f) + if len(ls) > 0: + ms.append(ls) + +ms.to_deepmd_raw("deepmd") +ms.to_deepmd_npy("deepmd") +``` diff --git a/docs/systems/system.md b/docs/systems/system.md new file mode 100644 index 000000000..0401a3110 --- /dev/null +++ b/docs/systems/system.md @@ -0,0 +1,112 @@ +# `System` and `LabeledSystem` + +This section gives some examples on how dpdata works. Firstly one needs to import the module in a python 3.x compatible code. +```python +import dpdata +``` +The typicall workflow of `dpdata` is + +1. Load data from vasp or lammps or deepmd-kit data files. +2. Manipulate data +3. Dump data to in a desired format + + +### Load data +```python +d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar") +``` +or let dpdata infer the format (`vasp/poscar`) of the file from the file name extension +```python +d_poscar = dpdata.System("my.POSCAR") +``` +The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data :class:`System` called `d_poscar`. +A data :class:`System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one :class:`System`. +It is noted that `POSCAR` only contains one frame. +If the multiple frames stored in, for example, a `OUTCAR` is wanted, +```python +d_outcar = dpdata.LabeledSystem("OUTCAR") +``` +The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any), are loaded by :class:`LabeledSystem`. It is noted that the forces of atoms are always assumed to exist. :class:`LabeledSystem` is a derived class of :class:`System`. + +The :class:`System` or :class:`LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`: + + + +### Access data +These properties stored in :class:`System` and :class:`LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example +```python +coords = d_outcar["coords"] +``` +Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system) + +| key | type | dimension | are labels | description +| --- | --- | --- | --- | --- +| 'atom_names' | list of str | ntypes | False | The name of each atom type +| 'atom_numbs' | list of int | ntypes | False | The number of atoms of each atom type +| 'atom_types' | np.ndarray | natoms | False | Array assigning type to each atom +| 'cells' | np.ndarray | nframes x 3 x 3 | False | The cell tensor of each frame +| 'coords' | np.ndarray | nframes x natoms x 3 | False | The atom coordinates +| 'energies' | np.ndarray | nframes | True | The frame energies +| 'forces' | np.ndarray | nframes x natoms x 3 | True | The atom forces +| 'virials' | np.ndarray | nframes x 3 x 3 | True | The virial tensor of each frame + + +### Dump data +The data stored in :class:`System` or :class:`LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example: +```python +d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0) +``` +The first frames of `d_outcar` will be dumped to 'conf.lmp' +```python +d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1) +``` +The last frames of `d_outcar` will be dumped to 'POSCAR'. + +The data stored in `LabeledSystem` can be dumped to deepmd-kit raw format, for example +```python +d_outcar.to("deepmd/raw", "dpmd_raw") +``` +Or a simpler command: +```python +dpdata.LabeledSystem("OUTCAR").to("deepmd/raw", "dpmd_raw") +``` +Frame selection can be implemented by +```python +dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw") +``` +by which only the first and last frames are dumped to `dpmd_raw`. + + +### replicate +dpdata will create a super cell of the current atom configuration. +```python +dpdata.System("./POSCAR").replicate( + ( + 1, + 2, + 3, + ) +) +``` +tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction. + + +### perturb +By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). +```python +perturbed_system = dpdata.System("./POSCAR").perturb( + pert_num=3, + cell_pert_fraction=0.05, + atom_pert_distance=0.6, + atom_pert_style="normal", +) +print(perturbed_system.data) +``` + +### replace +By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged. +```python +s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar") +s.replace("Hf", "Zr", 8) +s.to_vasp_poscar("POSCAR.P42nmc.replace") +``` diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py index fa1841777..9e0a416a0 100644 --- a/dpdata/abacus/md.py +++ b/dpdata/abacus/md.py @@ -5,6 +5,8 @@ import numpy as np +from dpdata.utils import open_file + from .scf import ( bohr2ang, get_cell, @@ -156,12 +158,12 @@ def get_frame(fname): path_in = os.path.join(fname, "INPUT") else: raise RuntimeError("invalid input") - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU path_out = get_path_out(fname, inlines) - with open(geometry_path_in) as fp: + with open_file(geometry_path_in) as fp: geometry_inlines = fp.read().split("\n") celldm, cell = get_cell(geometry_inlines) atom_names, natoms, types, coords = get_coords( @@ -172,11 +174,11 @@ def get_frame(fname): # ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0]) # number of dumped geometry files # coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell) - with open(os.path.join(path_out, "MD_dump")) as fp: + with open_file(os.path.join(path_out, "MD_dump")) as fp: dumplines = fp.read().split("\n") coords, cells, force, stress = get_coords_from_dump(dumplines, natoms) ndump = np.shape(coords)[0] - with open(os.path.join(path_out, "running_md.log")) as fp: + with open_file(os.path.join(path_out, "running_md.log")) as fp: outlines = fp.read().split("\n") energy = get_energy(outlines, ndump, dump_freq) diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py index 976243b82..7a4abf357 100644 --- a/dpdata/abacus/relax.py +++ b/dpdata/abacus/relax.py @@ -4,6 +4,8 @@ import numpy as np +from dpdata.utils import open_file + from .scf import ( bohr2ang, collect_force, @@ -174,10 +176,10 @@ def get_frame(fname): path_in = os.path.join(fname, "INPUT") else: raise RuntimeError("invalid input") - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU - with open(geometry_path_in) as fp: + with open_file(geometry_path_in) as fp: geometry_inlines = fp.read().split("\n") celldm, cell = get_cell(geometry_inlines) atom_names, natoms, types, coord_tmp = get_coords( @@ -186,7 +188,7 @@ def get_frame(fname): logf = get_log_file(fname, inlines) assert os.path.isfile(logf), f"Error: can not find {logf}" - with open(logf) as f1: + with open_file(logf) as f1: lines = f1.readlines() atomnumber = 0 diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index 193e4d4b5..93e3d6e15 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -6,6 +6,8 @@ import numpy as np +from dpdata.utils import open_file + from ..unit import EnergyConversion, LengthConversion, PressureConversion bohr2ang = LengthConversion("bohr", "angstrom").value() @@ -190,7 +192,7 @@ def collect_force(outlines): def get_force(outlines, natoms): force = collect_force(outlines) if len(force) == 0: - return [[]] + return None else: return np.array(force[-1]) # only return the last force @@ -253,7 +255,7 @@ def get_frame(fname): if not CheckFile(path_in): return data - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") geometry_path_in = get_geometry_in(fname, inlines) @@ -261,9 +263,9 @@ def get_frame(fname): if not (CheckFile(geometry_path_in) and CheckFile(path_out)): return data - with open(geometry_path_in) as fp: + with open_file(geometry_path_in) as fp: geometry_inlines = fp.read().split("\n") - with open(path_out) as fp: + with open_file(path_out) as fp: outlines = fp.read().split("\n") celldm, cell = get_cell(geometry_inlines) @@ -285,7 +287,7 @@ def get_frame(fname): data["cells"] = cell[np.newaxis, :, :] data["coords"] = coords[np.newaxis, :, :] data["energies"] = np.array(energy)[np.newaxis] - data["forces"] = force[np.newaxis, :, :] + data["forces"] = np.empty((0,)) if force is None else force[np.newaxis, :, :] if stress is not None: data["virials"] = stress[np.newaxis, :, :] data["orig"] = np.zeros(3) @@ -338,7 +340,7 @@ def get_nele_from_stru(geometry_inlines): def get_frame_from_stru(fname): assert isinstance(fname, str) - with open(fname) as fp: + with open_file(fname) as fp: geometry_inlines = fp.read().split("\n") nele = get_nele_from_stru(geometry_inlines) inlines = ["ntype %d" % nele] diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py index f3217fbd9..cb4f2d25e 100644 --- a/dpdata/amber/md.py +++ b/dpdata/amber/md.py @@ -7,6 +7,7 @@ from dpdata.amber.mask import pick_by_amber_mask from dpdata.unit import EnergyConversion +from dpdata.utils import open_file from ..periodic_table import ELEMENTS @@ -51,7 +52,7 @@ def read_amber_traj( flag_atom_numb = False amber_types = [] atomic_number = [] - with open(parm7_file) as f: + with open_file(parm7_file) as f: for line in f: if line.startswith("%FLAG"): flag_atom_type = line.startswith("%FLAG AMBER_ATOM_TYPE") @@ -101,14 +102,14 @@ def read_amber_traj( # load energy from mden_file or mdout_file energies = [] if mden_file is not None and os.path.isfile(mden_file): - with open(mden_file) as f: + with open_file(mden_file) as f: for line in f: if line.startswith("L6"): s = line.split() if s[2] != "E_pot": energies.append(float(s[2])) elif mdout_file is not None and os.path.isfile(mdout_file): - with open(mdout_file) as f: + with open_file(mdout_file) as f: for line in f: if "EPtot" in line: s = line.split() diff --git a/dpdata/amber/sqm.py b/dpdata/amber/sqm.py index 1be3802a2..93e41f9aa 100644 --- a/dpdata/amber/sqm.py +++ b/dpdata/amber/sqm.py @@ -1,9 +1,15 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.periodic_table import ELEMENTS from dpdata.unit import EnergyConversion +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType kcal2ev = EnergyConversion("kcal_mol", "eV").value() @@ -14,7 +20,7 @@ READ_FORCES = 7 -def parse_sqm_out(fname): +def parse_sqm_out(fname: FileType): """Read atom symbols, charges and coordinates from ambertools sqm.out file.""" atom_symbols = [] coords = [] @@ -22,7 +28,7 @@ def parse_sqm_out(fname): forces = [] energies = [] - with open(fname) as f: + with open_file(fname) as f: flag = START for line in f: if line.startswith(" Total SCF energy"): @@ -81,7 +87,7 @@ def parse_sqm_out(fname): return data -def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): +def make_sqm_in(data, fname: FileType | None = None, frame_idx=0, **kwargs): symbols = [data["atom_names"][ii] for ii in data["atom_types"]] atomic_numbers = [ELEMENTS.index(ss) + 1 for ss in symbols] charge = kwargs.get("charge", 0) @@ -109,6 +115,6 @@ def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): f"{data['coords'][frame_idx][ii, 2]:.6f}", ) if fname is not None: - with open(fname, "w") as fp: + with open_file(fname, "w") as fp: fp.write(ret) return ret diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index ab0044477..3d656b709 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -8,6 +8,7 @@ import numpy as np import dpdata +from dpdata.utils import open_file from .raw import load_type @@ -67,40 +68,42 @@ def to_system_data(folder, type_map=None, labels=True): data["virials"] = np.concatenate(all_virs, axis=0) # allow custom dtypes if labels: - for dtype in dpdata.system.LabeledSystem.DTYPES: - if dtype.name in ( - "atom_numbs", - "atom_names", - "atom_types", - "orig", - "cells", - "coords", - "real_atom_types", - "real_atom_names", - "nopbc", - "energies", - "forces", - "virials", - ): - # skip as these data contains specific rules - continue - if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): - warnings.warn( - f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format." - ) - continue - natoms = data["coords"].shape[1] - shape = [ - natoms if xx == dpdata.system.Axis.NATOMS else xx - for xx in dtype.shape[1:] - ] - all_data = [] - for ii in sets: - tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy")) - if tmp is not None: - all_data.append(np.reshape(tmp, [tmp.shape[0], *shape])) - if len(all_data) > 0: - data[dtype.name] = np.concatenate(all_data, axis=0) + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + + for dtype in dtypes: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format." + ) + continue + natoms = data["coords"].shape[1] + shape = [ + natoms if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:] + ] + all_data = [] + for ii in sets: + tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy")) + if tmp is not None: + all_data.append(np.reshape(tmp, [tmp.shape[0], *shape])) + if len(all_data) > 0: + data[dtype.name] = np.concatenate(all_data, axis=0) return data @@ -170,10 +173,15 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): except OSError: pass if data.get("nopbc", False): - with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: + with open_file(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass # allow custom dtypes - for dtype in dpdata.system.LabeledSystem.DTYPES: + labels = "energies" in data + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", @@ -181,7 +189,6 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): "orig", "cells", "coords", - "real_atom_types", "real_atom_names", "nopbc", "energies", @@ -197,7 +204,9 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/npy format." ) continue - ddata = np.reshape(data[dtype.name], [nframes, -1]).astype(comp_prec) + ddata = np.reshape(data[dtype.name], [nframes, -1]) + if np.issubdtype(ddata.dtype, np.floating): + ddata = ddata.astype(comp_prec) for ii in range(nsets): set_stt = ii * set_size set_end = (ii + 1) * set_size diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py index 34ae9dbef..6b5878932 100644 --- a/dpdata/deepmd/hdf5.py +++ b/dpdata/deepmd/hdf5.py @@ -102,7 +102,11 @@ def to_system_data( }, } # allow custom dtypes - for dtype in dpdata.system.LabeledSystem.DTYPES: + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", @@ -210,8 +214,13 @@ def dump( "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True}, } + labels = "energies" in data + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES # allow custom dtypes - for dtype in dpdata.system.LabeledSystem.DTYPES: + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", @@ -243,9 +252,10 @@ def dump( for dt, prop in data_types.items(): if dt in data: if prop["dump"]: - reshaped_data[dt] = np.reshape(data[dt], prop["shape"]).astype( - comp_prec - ) + ddata = np.reshape(data[dt], prop["shape"]) + if np.issubdtype(ddata.dtype, np.floating): + ddata = ddata.astype(comp_prec) + reshaped_data[dt] = ddata # dump frame properties: cell, coord, energy, force and virial nsets = nframes // set_size diff --git a/dpdata/deepmd/mixed.py b/dpdata/deepmd/mixed.py index b25107dbc..2aff3a28a 100644 --- a/dpdata/deepmd/mixed.py +++ b/dpdata/deepmd/mixed.py @@ -1,53 +1,16 @@ from __future__ import annotations -import glob -import os -import shutil +import copy import numpy as np - -def load_type(folder): - data = {} - data["atom_names"] = [] - # if find type_map.raw, use it - assert os.path.isfile( - os.path.join(folder, "type_map.raw") - ), "Mixed type system must have type_map.raw!" - with open(os.path.join(folder, "type_map.raw")) as fp: - data["atom_names"] = fp.read().split() - - return data - - -def formula(atom_names, atom_numbs): - """Return the formula of this system, like C3H5O2.""" - return "".join([f"{symbol}{numb}" for symbol, numb in zip(atom_names, atom_numbs)]) - - -def _cond_load_data(fname): - tmp = None - if os.path.isfile(fname): - tmp = np.load(fname) - return tmp - - -def _load_set(folder, nopbc: bool): - coords = np.load(os.path.join(folder, "coord.npy")) - if nopbc: - cells = np.zeros((coords.shape[0], 3, 3)) - else: - cells = np.load(os.path.join(folder, "box.npy")) - eners = _cond_load_data(os.path.join(folder, "energy.npy")) - forces = _cond_load_data(os.path.join(folder, "force.npy")) - virs = _cond_load_data(os.path.join(folder, "virial.npy")) - real_atom_types = np.load(os.path.join(folder, "real_atom_types.npy")) - return cells, coords, eners, forces, virs, real_atom_types +from .comp import dump as comp_dump +from .comp import to_system_data as comp_to_system_data def to_system_data(folder, type_map=None, labels=True): + data = comp_to_system_data(folder, type_map, labels) # data is empty - data = load_type(folder) old_type_map = data["atom_names"].copy() if type_map is not None: assert isinstance(type_map, list) @@ -59,50 +22,16 @@ def to_system_data(folder, type_map=None, labels=True): data["atom_names"] = type_map.copy() else: index_map = None - data["orig"] = np.zeros([3]) - if os.path.isfile(os.path.join(folder, "nopbc")): - data["nopbc"] = True - sets = sorted(glob.glob(os.path.join(folder, "set.*"))) - all_cells = [] - all_coords = [] - all_eners = [] - all_forces = [] - all_virs = [] - all_real_atom_types = [] - for ii in sets: - cells, coords, eners, forces, virs, real_atom_types = _load_set( - ii, data.get("nopbc", False) - ) - nframes = np.reshape(cells, [-1, 3, 3]).shape[0] - all_cells.append(np.reshape(cells, [nframes, 3, 3])) - all_coords.append(np.reshape(coords, [nframes, -1, 3])) - if index_map is None: - all_real_atom_types.append(np.reshape(real_atom_types, [nframes, -1])) - else: - all_real_atom_types.append( - np.reshape(index_map[real_atom_types], [nframes, -1]) - ) - if eners is not None: - eners = np.reshape(eners, [nframes]) - if labels: - if eners is not None and eners.size > 0: - all_eners.append(np.reshape(eners, [nframes])) - if forces is not None and forces.size > 0: - all_forces.append(np.reshape(forces, [nframes, -1, 3])) - if virs is not None and virs.size > 0: - all_virs.append(np.reshape(virs, [nframes, 3, 3])) - all_cells_concat = np.concatenate(all_cells, axis=0) - all_coords_concat = np.concatenate(all_coords, axis=0) - all_real_atom_types_concat = np.concatenate(all_real_atom_types, axis=0) - all_eners_concat = None - all_forces_concat = None - all_virs_concat = None - if len(all_eners) > 0: - all_eners_concat = np.concatenate(all_eners, axis=0) - if len(all_forces) > 0: - all_forces_concat = np.concatenate(all_forces, axis=0) - if len(all_virs) > 0: - all_virs_concat = np.concatenate(all_virs, axis=0) + all_real_atom_types_concat = data.pop("real_atom_types").astype(int) + if index_map is not None: + all_real_atom_types_concat = index_map[all_real_atom_types_concat] + all_cells_concat = data["cells"] + all_coords_concat = data["coords"] + if labels: + all_eners_concat = data.get("energies") + all_forces_concat = data.get("forces") + all_virs_concat = data.get("virials") + data_list = [] while True: if all_real_atom_types_concat.size == 0: @@ -142,92 +71,22 @@ def to_system_data(folder, type_map=None, labels=True): def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True): - os.makedirs(folder, exist_ok=True) - sets = sorted(glob.glob(os.path.join(folder, "set.*"))) - if len(sets) > 0: - if remove_sets: - for ii in sets: - shutil.rmtree(ii) - else: - raise RuntimeError( - "found " - + str(sets) - + " in " - + folder - + "not a clean deepmd raw dir. please firstly clean set.* then try compress" - ) # if not converted to mixed if "real_atom_types" not in data: from dpdata import LabeledSystem, System + # not change the original content + data = copy.deepcopy(data) + if "energies" in data: temp_sys = LabeledSystem(data=data) else: temp_sys = System(data=data) temp_sys.convert_to_mixed_type() - # dump raw - np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d") - np.savetxt(os.path.join(folder, "type_map.raw"), data["real_atom_names"], fmt="%s") - # BondOrder System - if "bonds" in data: - np.savetxt( - os.path.join(folder, "bonds.raw"), - data["bonds"], - header="begin_atom, end_atom, bond_order", - ) - if "formal_charges" in data: - np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"]) - # reshape frame properties and convert prec - nframes = data["cells"].shape[0] - cells = np.reshape(data["cells"], [nframes, 9]).astype(comp_prec) - coords = np.reshape(data["coords"], [nframes, -1]).astype(comp_prec) - eners = None - forces = None - virials = None - real_atom_types = None - if "energies" in data: - eners = np.reshape(data["energies"], [nframes]).astype(comp_prec) - if "forces" in data: - forces = np.reshape(data["forces"], [nframes, -1]).astype(comp_prec) - if "virials" in data: - virials = np.reshape(data["virials"], [nframes, 9]).astype(comp_prec) - if "atom_pref" in data: - atom_pref = np.reshape(data["atom_pref"], [nframes, -1]).astype(comp_prec) - if "real_atom_types" in data: - real_atom_types = np.reshape(data["real_atom_types"], [nframes, -1]).astype( - np.int64 - ) - # dump frame properties: cell, coord, energy, force and virial - nsets = nframes // set_size - if set_size * nsets < nframes: - nsets += 1 - for ii in range(nsets): - set_stt = ii * set_size - set_end = (ii + 1) * set_size - set_folder = os.path.join(folder, "set.%06d" % ii) - os.makedirs(set_folder) - np.save(os.path.join(set_folder, "box"), cells[set_stt:set_end]) - np.save(os.path.join(set_folder, "coord"), coords[set_stt:set_end]) - if eners is not None: - np.save(os.path.join(set_folder, "energy"), eners[set_stt:set_end]) - if forces is not None: - np.save(os.path.join(set_folder, "force"), forces[set_stt:set_end]) - if virials is not None: - np.save(os.path.join(set_folder, "virial"), virials[set_stt:set_end]) - if real_atom_types is not None: - np.save( - os.path.join(set_folder, "real_atom_types"), - real_atom_types[set_stt:set_end], - ) - if "atom_pref" in data: - np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end]) - try: - os.remove(os.path.join(folder, "nopbc")) - except OSError: - pass - if data.get("nopbc", False): - with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: - pass + + data = data.copy() + data["atom_names"] = data.pop("real_atom_names") + comp_dump(folder, data, set_size, comp_prec, remove_sets) def mix_system(*system, type_map, **kwargs): diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index e772714a1..f8ddcaf35 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -6,6 +6,7 @@ import numpy as np import dpdata +from dpdata.utils import open_file def load_type(folder, type_map=None): @@ -14,13 +15,10 @@ def load_type(folder, type_map=None): int ) ntypes = np.max(data["atom_types"]) + 1 - data["atom_numbs"] = [] - for ii in range(ntypes): - data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) data["atom_names"] = [] # if find type_map.raw, use it if os.path.isfile(os.path.join(folder, "type_map.raw")): - with open(os.path.join(folder, "type_map.raw")) as fp: + with open_file(os.path.join(folder, "type_map.raw")) as fp: my_type_map = fp.read().split() # else try to use arg type_map elif type_map is not None: @@ -30,9 +28,10 @@ def load_type(folder, type_map=None): my_type_map = [] for ii in range(ntypes): my_type_map.append("Type_%d" % ii) - assert len(my_type_map) >= len(data["atom_numbs"]) - for ii in range(len(data["atom_numbs"])): - data["atom_names"].append(my_type_map[ii]) + data["atom_names"] = my_type_map + data["atom_numbs"] = [] + for ii, _ in enumerate(data["atom_names"]): + data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) return data @@ -64,40 +63,41 @@ def to_system_data(folder, type_map=None, labels=True): data["nopbc"] = True # allow custom dtypes if labels: - for dtype in dpdata.system.LabeledSystem.DTYPES: - if dtype.name in ( - "atom_numbs", - "atom_names", - "atom_types", - "orig", - "cells", - "coords", - "real_atom_types", - "real_atom_names", - "nopbc", - "energies", - "forces", - "virials", - ): - # skip as these data contains specific rules - continue - if not ( - len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES - ): - warnings.warn( - f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format." - ) - continue - natoms = data["coords"].shape[1] - shape = [ - natoms if xx == dpdata.system.Axis.NATOMS else xx - for xx in dtype.shape[1:] - ] - if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")): - data[dtype.name] = np.reshape( - np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")), - [nframes, *shape], - ) + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + for dtype in dtypes: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format." + ) + continue + natoms = data["coords"].shape[1] + shape = [ + natoms if xx == dpdata.system.Axis.NATOMS else xx + for xx in dtype.shape[1:] + ] + if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")): + data[dtype.name] = np.reshape( + np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")), + [nframes, *shape], + ) return data else: raise RuntimeError("not dir " + folder) @@ -141,10 +141,15 @@ def dump(folder, data): except OSError: pass if data.get("nopbc", False): - with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: + with open_file(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass # allow custom dtypes - for dtype in dpdata.system.LabeledSystem.DTYPES: + labels = "energies" in data + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", diff --git a/dpdata/dftbplus/output.py b/dpdata/dftbplus/output.py index 0f10c3ac9..49fdd2b1b 100644 --- a/dpdata/dftbplus/output.py +++ b/dpdata/dftbplus/output.py @@ -1,9 +1,18 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + -def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.ndarray]: +def read_dftb_plus( + fn_1: FileType, fn_2: FileType +) -> tuple[str, np.ndarray, float, np.ndarray]: """Read from DFTB+ input and output. Parameters @@ -29,7 +38,7 @@ def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.nda symbols = None forces = None energy = None - with open(fn_1) as f: + with open_file(fn_1) as f: flag = 0 for line in f: if flag == 1: @@ -49,7 +58,7 @@ def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.nda flag += 1 if flag == 7: flag = 0 - with open(fn_2) as f: + with open_file(fn_2) as f: flag = 0 for line in f: if line.startswith("Total Forces"): diff --git a/dpdata/gaussian/log.py b/dpdata/gaussian/log.py index 204cf464c..08a65b9dc 100644 --- a/dpdata/gaussian/log.py +++ b/dpdata/gaussian/log.py @@ -1,7 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..periodic_table import ELEMENTS from ..unit import EnergyConversion, ForceConversion, LengthConversion @@ -12,7 +19,7 @@ symbols = ["X"] + ELEMENTS -def to_system_data(file_name, md=False): +def to_system_data(file_name: FileType, md=False): """Read Gaussian log file. Parameters @@ -43,7 +50,7 @@ def to_system_data(file_name, md=False): nopbc = True coords = None - with open(file_name) as fp: + with open_file(file_name) as fp: for line in fp: if line.startswith(" SCF Done"): # energies diff --git a/dpdata/gromacs/gro.py b/dpdata/gromacs/gro.py index aca2443b8..fe83e0c5c 100644 --- a/dpdata/gromacs/gro.py +++ b/dpdata/gromacs/gro.py @@ -2,9 +2,15 @@ from __future__ import annotations import re +from typing import TYPE_CHECKING import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..unit import LengthConversion nm2ang = LengthConversion("nm", "angstrom").value() @@ -48,9 +54,9 @@ def _get_cell(line): return cell -def file_to_system_data(fname, format_atom_name=True, **kwargs): +def file_to_system_data(fname: FileType, format_atom_name=True, **kwargs): system = {"coords": [], "cells": []} - with open(fname) as fp: + with open_file(fname) as fp: frame = 0 while True: flag = fp.readline() diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py index f0ade2b03..72fee27df 100644 --- a/dpdata/lammps/dump.py +++ b/dpdata/lammps/dump.py @@ -3,9 +3,15 @@ import os import sys +from typing import TYPE_CHECKING import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + lib_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(lib_path) import warnings @@ -169,11 +175,11 @@ def box2dumpbox(orig, box): return bounds, tilt -def load_file(fname, begin=0, step=1): +def load_file(fname: FileType, begin=0, step=1): lines = [] buff = [] cc = -1 - with open(fname) as fp: + with open_file(fname) as fp: while True: line = fp.readline().rstrip("\n") if not line: diff --git a/dpdata/openmx/omx.py b/dpdata/openmx/omx.py index d3afff00f..aae9b5786 100644 --- a/dpdata/openmx/omx.py +++ b/dpdata/openmx/omx.py @@ -1,8 +1,15 @@ #!/usr/bin/python3 from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..unit import ( EnergyConversion, ForceConversion, @@ -98,12 +105,12 @@ def load_cells(lines): # load atom_names, atom_numbs, atom_types, cells -def load_param_file(fname, mdname): - with open(fname) as dat_file: +def load_param_file(fname: FileType, mdname: FileType): + with open_file(fname) as dat_file: lines = dat_file.readlines() atom_names, atom_types, atom_numbs = load_atom(lines) - with open(mdname) as md_file: + with open_file(mdname) as md_file: lines = md_file.readlines() cells = load_cells(lines) return atom_names, atom_numbs, atom_types, cells @@ -133,15 +140,15 @@ def load_coords(lines, atom_names, natoms): return coords -def load_data(mdname, atom_names, natoms): - with open(mdname) as md_file: +def load_data(mdname: FileType, atom_names, natoms): + with open_file(mdname) as md_file: lines = md_file.readlines() coords = load_coords(lines, atom_names, natoms) steps = [str(i) for i in range(1, coords.shape[0] + 1)] return coords, steps -def to_system_data(fname, mdname): +def to_system_data(fname: FileType, mdname: FileType): data = {} ( data["atom_names"], @@ -194,7 +201,7 @@ def load_force(lines, atom_names, atom_numbs): # load energy, force def to_system_label(fname, mdname): atom_names, atom_numbs, atom_types, cells = load_param_file(fname, mdname) - with open(mdname) as md_file: + with open_file(mdname) as md_file: lines = md_file.readlines() energy = load_energy(lines) force = load_force(lines, atom_names, atom_numbs) diff --git a/dpdata/orca/output.py b/dpdata/orca/output.py index a23013fda..a0915162b 100644 --- a/dpdata/orca/output.py +++ b/dpdata/orca/output.py @@ -1,9 +1,18 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + -def read_orca_sp_output(fn: str) -> tuple[np.ndarray, np.ndarray, float, np.ndarray]: +def read_orca_sp_output( + fn: FileType, +) -> tuple[np.ndarray, np.ndarray, float, np.ndarray]: """Read from ORCA output. Note that both the energy and the gradient should be printed. @@ -28,7 +37,7 @@ def read_orca_sp_output(fn: str) -> tuple[np.ndarray, np.ndarray, float, np.ndar symbols = None forces = None energy = None - with open(fn) as f: + with open_file(fn) as f: flag = 0 for line in f: if flag in (1, 3, 4): diff --git a/dpdata/plugins/abacus.py b/dpdata/plugins/abacus.py index eb2d7786f..e3367b351 100644 --- a/dpdata/plugins/abacus.py +++ b/dpdata/plugins/abacus.py @@ -1,9 +1,15 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import dpdata.abacus.md import dpdata.abacus.relax import dpdata.abacus.scf from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("abacus/stru") @@ -12,7 +18,7 @@ class AbacusSTRUFormat(Format): def from_system(self, file_name, **kwargs): return dpdata.abacus.scf.get_frame_from_stru(file_name) - def to_system(self, data, file_name, frame_idx=0, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs): """Dump the system into ABACUS STRU format file. Parameters @@ -46,7 +52,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): numerical_descriptor=numerical_descriptor, mass=mass, ) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(stru_string) diff --git a/dpdata/plugins/amber.py b/dpdata/plugins/amber.py index 42fce5528..361e0d8a7 100644 --- a/dpdata/plugins/amber.py +++ b/dpdata/plugins/amber.py @@ -8,6 +8,7 @@ import dpdata.amber.sqm from dpdata.driver import Driver, Minimizer from dpdata.format import Format +from dpdata.utils import open_file @Format.register("amber/md") @@ -143,7 +144,7 @@ def label(self, data: dict) -> dict: [*self.sqm_exec.split(), "-O", "-i", inp_fn, "-o", out_fn] ) except sp.CalledProcessError as e: - with open(out_fn) as f: + with open_file(out_fn) as f: raise RuntimeError( "Run sqm failed! Output:\n" + f.read() ) from e diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py index b55447b91..55bee5a4c 100644 --- a/dpdata/plugins/gaussian.py +++ b/dpdata/plugins/gaussian.py @@ -3,16 +3,21 @@ import os import subprocess as sp import tempfile +from typing import TYPE_CHECKING import dpdata.gaussian.gjf import dpdata.gaussian.log from dpdata.driver import Driver from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("gaussian/log") class GaussianLogFormat(Format): - def from_labeled_system(self, file_name, md=False, **kwargs): + def from_labeled_system(self, file_name: FileType, md=False, **kwargs): try: return dpdata.gaussian.log.to_system_data(file_name, md=md) except AssertionError: @@ -21,7 +26,7 @@ def from_labeled_system(self, file_name, md=False, **kwargs): @Format.register("gaussian/md") class GaussianMDFormat(Format): - def from_labeled_system(self, file_name, **kwargs): + def from_labeled_system(self, file_name: FileType, **kwargs): return GaussianLogFormat().from_labeled_system(file_name, md=True) @@ -29,7 +34,7 @@ def from_labeled_system(self, file_name, **kwargs): class GaussiaGJFFormat(Format): """Gaussian input file.""" - def from_system(self, file_name: str, **kwargs): + def from_system(self, file_name: FileType, **kwargs): """Read Gaussian input file. Parameters @@ -39,11 +44,11 @@ def from_system(self, file_name: str, **kwargs): **kwargs : dict keyword arguments """ - with open(file_name) as fp: + with open_file(file_name) as fp: text = fp.read() return dpdata.gaussian.gjf.read_gaussian_input(text) - def to_system(self, data: dict, file_name: str, **kwargs): + def to_system(self, data: dict, file_name: FileType, **kwargs): """Generate Gaussian input file. Parameters @@ -56,7 +61,7 @@ def to_system(self, data: dict, file_name: str, **kwargs): Other parameters to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input` """ text = dpdata.gaussian.gjf.make_gaussian_input(data, **kwargs) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(text) @@ -110,7 +115,7 @@ def label(self, data: dict) -> dict: try: sp.check_output([*self.gaussian_exec.split(), inp_fn]) except sp.CalledProcessError as e: - with open(out_fn) as f: + with open_file(out_fn) as f: out = f.read() raise RuntimeError("Run gaussian failed! Output:\n" + out) from e labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="gaussian/log")) diff --git a/dpdata/plugins/gromacs.py b/dpdata/plugins/gromacs.py index 12dece718..a7066bbcc 100644 --- a/dpdata/plugins/gromacs.py +++ b/dpdata/plugins/gromacs.py @@ -1,7 +1,13 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import dpdata.gromacs.gro from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("gro") @@ -23,7 +29,9 @@ def from_system(self, file_name, format_atom_name=True, **kwargs): file_name, format_atom_name=format_atom_name, **kwargs ) - def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): + def to_system( + self, data, file_name: FileType | None = None, frame_idx=-1, **kwargs + ): """Dump the system in gromacs .gro format. Parameters @@ -52,5 +60,5 @@ def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): if file_name is None: return gro_str else: - with open(file_name, "w+") as fp: + with open_file(file_name, "w+") as fp: fp.write(gro_str) diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 65e7f5701..0327d444b 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -1,20 +1,26 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import dpdata.lammps.dump import dpdata.lammps.lmp from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("lmp") @Format.register("lammps/lmp") class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, file_name, type_map=None, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, type_map=None, **kwargs): + with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] return dpdata.lammps.lmp.to_system_data(lines, type_map) - def to_system(self, data, file_name, frame_idx=0, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs): """Dump the system in lammps data format. Parameters @@ -30,7 +36,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): """ assert frame_idx < len(data["coords"]) w_str = dpdata.lammps.lmp.from_system_data(data, frame_idx) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/n2p2.py b/dpdata/plugins/n2p2.py index b70d6e6fb..28942ff5f 100644 --- a/dpdata/plugins/n2p2.py +++ b/dpdata/plugins/n2p2.py @@ -1,8 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType from ..unit import EnergyConversion, ForceConversion, LengthConversion @@ -44,7 +50,7 @@ class N2P2Format(Format): For more information about the n2p2 format, please refer to https://compphysvienna.github.io/n2p2/topics/cfg_file.html """ - def from_labeled_system(self, file_name, **kwargs): + def from_labeled_system(self, file_name: FileType, **kwargs): """Read from n2p2 format. Parameters @@ -67,7 +73,7 @@ def from_labeled_system(self, file_name, **kwargs): natom0 = None natoms0 = None atom_types0 = None - with open(file_name) as file: + with open_file(file_name) as file: for line in file: line = line.strip() # Remove leading/trailing whitespace if line.lower() == "begin": @@ -155,7 +161,7 @@ def from_labeled_system(self, file_name, **kwargs): "forces": forces, } - def to_labeled_system(self, data, file_name, **kwargs): + def to_labeled_system(self, data, file_name: FileType, **kwargs): """Write n2p2 format. By default, LabeledSystem.to will fallback to System.to. @@ -193,5 +199,5 @@ def to_labeled_system(self, data, file_name, **kwargs): buff.append(f"energy {energy:15.6f}") buff.append(f"charge {0:15.6f}") buff.append("end") - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write("\n".join(buff)) diff --git a/dpdata/plugins/orca.py b/dpdata/plugins/orca.py index 9dc32c32c..7a0b806c9 100644 --- a/dpdata/plugins/orca.py +++ b/dpdata/plugins/orca.py @@ -1,11 +1,16 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format from dpdata.orca.output import read_orca_sp_output from dpdata.unit import EnergyConversion, ForceConversion +if TYPE_CHECKING: + from dpdata.utils import FileType + energy_convert = EnergyConversion("hartree", "eV").value() force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() @@ -18,12 +23,12 @@ class ORCASPOutFormat(Format): printed into the output file. """ - def from_labeled_system(self, file_name: str, **kwargs) -> dict: + def from_labeled_system(self, file_name: FileType, **kwargs) -> dict: """Read from ORCA single point energy output. Parameters ---------- - file_name : str + file_name : FileType file name **kwargs keyword arguments diff --git a/dpdata/plugins/psi4.py b/dpdata/plugins/psi4.py index a0cf00e46..2bbfc2321 100644 --- a/dpdata/plugins/psi4.py +++ b/dpdata/plugins/psi4.py @@ -1,11 +1,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format from dpdata.psi4.input import write_psi4_input from dpdata.psi4.output import read_psi4_output from dpdata.unit import EnergyConversion, ForceConversion +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType energy_convert = EnergyConversion("hartree", "eV").value() force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() @@ -19,12 +25,12 @@ class PSI4OutFormat(Format): printed into the output file. """ - def from_labeled_system(self, file_name: str, **kwargs) -> dict: + def from_labeled_system(self, file_name: FileType, **kwargs) -> dict: """Read from Psi4 output. Parameters ---------- - file_name : str + file_name : FileType file name **kwargs keyword arguments @@ -61,7 +67,7 @@ class PSI4InputFormat(Format): def to_system( self, data: dict, - file_name: str, + file_name: FileType, method: str, basis: str, charge: int = 0, @@ -91,7 +97,7 @@ def to_system( keyword arguments """ types = np.array(data["atom_names"])[data["atom_types"]] - with open(file_name, "w") as fout: + with open_file(file_name, "w") as fout: fout.write( write_psi4_input( types=types, diff --git a/dpdata/plugins/pwmat.py b/dpdata/plugins/pwmat.py index 80f219b6c..ba3dab160 100644 --- a/dpdata/plugins/pwmat.py +++ b/dpdata/plugins/pwmat.py @@ -1,10 +1,16 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import dpdata.pwmat.atomconfig import dpdata.pwmat.movement from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("movement") @@ -47,12 +53,12 @@ def from_labeled_system( @Format.register("pwmat/final.config") class PwmatAtomconfigFormat(Format): @Format.post("rot_lower_triangular") - def from_system(self, file_name, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, **kwargs): + with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] return dpdata.pwmat.atomconfig.to_system_data(lines) - def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, *args, **kwargs): """Dump the system in pwmat atom.config format. Parameters @@ -70,5 +76,5 @@ def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): """ assert frame_idx < len(data["coords"]) w_str = dpdata.pwmat.atomconfig.from_system_data(data, frame_idx) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/vasp.py b/dpdata/plugins/vasp.py index d0681cebf..0160bde29 100644 --- a/dpdata/plugins/vasp.py +++ b/dpdata/plugins/vasp.py @@ -1,12 +1,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import dpdata.vasp.outcar import dpdata.vasp.poscar import dpdata.vasp.xml from dpdata.format import Format -from dpdata.utils import uniq_atom_names +from dpdata.utils import open_file, uniq_atom_names + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("poscar") @@ -15,14 +20,14 @@ @Format.register("vasp/contcar") class VASPPoscarFormat(Format): @Format.post("rot_lower_triangular") - def from_system(self, file_name, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, **kwargs): + with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] data = dpdata.vasp.poscar.to_system_data(lines) data = uniq_atom_names(data) return data - def to_system(self, data, file_name, frame_idx=0, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs): """Dump the system in vasp POSCAR format. Parameters @@ -37,7 +42,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): other parameters """ w_str = VASPStringFormat().to_system(data, frame_idx=frame_idx) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 322bf77cb..d56a8618c 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -1,8 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord @@ -16,16 +22,16 @@ class XYZFormat(Format): >>> s.to("xyz", "a.xyz") """ - def to_system(self, data, file_name, **kwargs): + def to_system(self, data, file_name: FileType, **kwargs): buff = [] types = np.array(data["atom_names"])[data["atom_types"]] for cc in data["coords"]: buff.append(coord_to_xyz(cc, types)) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write("\n".join(buff)) - def from_system(self, file_name, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, **kwargs): + with open_file(file_name) as fp: coords, types = xyz_to_coord(fp.read()) atom_names, atom_types, atom_numbs = np.unique( types, return_inverse=True, return_counts=True diff --git a/dpdata/psi4/output.py b/dpdata/psi4/output.py index c06eb1829..c3594ffb4 100644 --- a/dpdata/psi4/output.py +++ b/dpdata/psi4/output.py @@ -1,11 +1,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.unit import LengthConversion +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType -def read_psi4_output(fn: str) -> tuple[str, np.ndarray, float, np.ndarray]: +def read_psi4_output(fn: FileType) -> tuple[str, np.ndarray, float, np.ndarray]: """Read from Psi4 output. Note that both the energy and the gradient should be printed. @@ -31,7 +37,7 @@ def read_psi4_output(fn: str) -> tuple[str, np.ndarray, float, np.ndarray]: forces = None energy = None length_unit = None - with open(fn) as f: + with open_file(fn) as f: flag = 0 for line in f: if flag in (1, 3, 4, 5, 6): diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py index 37e5fbab6..f86708605 100755 --- a/dpdata/qe/scf.py +++ b/dpdata/qe/scf.py @@ -5,6 +5,8 @@ import numpy as np +from dpdata.utils import open_file + ry2ev = 13.605693009 bohr2ang = 0.52917721067 kbar2evperang3 = 1e3 / 1.602176621e6 @@ -142,9 +144,9 @@ def get_frame(fname): path_out = fname[1] else: raise RuntimeError("invalid input") - with open(path_out) as fp: + with open_file(path_out) as fp: outlines = fp.read().split("\n") - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") cell = get_cell(inlines) atom_names, natoms, types, coords = get_coords(inlines, cell) diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py index 1fbf0f71c..b4be303ad 100644 --- a/dpdata/qe/traj.py +++ b/dpdata/qe/traj.py @@ -2,9 +2,15 @@ from __future__ import annotations import warnings +from typing import TYPE_CHECKING import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..unit import ( EnergyConversion, ForceConversion, @@ -87,8 +93,8 @@ def load_atom_types(lines, natoms, atom_names): return np.array(ret, dtype=int) -def load_param_file(fname): - with open(fname) as fp: +def load_param_file(fname: FileType): + with open_file(fname) as fp: lines = fp.read().split("\n") natoms = int(load_key(lines, "nat")) ntypes = int(load_key(lines, "ntyp")) @@ -127,11 +133,11 @@ def _load_pos_block(fp, natoms): return blk, ss -def load_data(fname, natoms, begin=0, step=1, convert=1.0): +def load_data(fname: FileType, natoms, begin=0, step=1, convert=1.0): coords = [] steps = [] cc = 0 - with open(fname) as fp: + with open_file(fname) as fp: while True: blk, ss = _load_pos_block(fp, natoms) if blk is None: @@ -147,7 +153,7 @@ def load_data(fname, natoms, begin=0, step=1, convert=1.0): # def load_pos(fname, natoms) : # coords = [] -# with open(fname) as fp: +# with open_file(fname) as fp: # while True: # blk = _load_pos_block(fp, natoms) # # print(blk) @@ -164,7 +170,7 @@ def load_energy(fname, begin=0, step=1): steps = [] for ii in data[begin::step, 0]: steps.append("%d" % ii) - with open(fname) as fp: + with open_file(fname) as fp: while True: line = fp.readline() if not line: @@ -178,7 +184,7 @@ def load_energy(fname, begin=0, step=1): # def load_force(fname, natoms) : # coords = [] -# with open(fname) as fp: +# with open_file(fname) as fp: # while True: # blk = _load_pos_block(fp, natoms) # # print(blk) diff --git a/dpdata/utils.py b/dpdata/utils.py index e008120ea..8942bd54d 100644 --- a/dpdata/utils.py +++ b/dpdata/utils.py @@ -1,7 +1,10 @@ from __future__ import annotations +import io +import os import sys -from typing import overload +from contextlib import contextmanager +from typing import TYPE_CHECKING, Generator, overload if sys.version_info >= (3, 8): from typing import Literal @@ -129,3 +132,44 @@ def uniq_atom_names(data): def utf8len(s: str) -> int: """Return the byte length of a string.""" return len(s.encode("utf-8")) + + +if TYPE_CHECKING: + FileType = io.IOBase | str | os.PathLike + + +@contextmanager +def open_file(file: FileType, *args, **kwargs) -> Generator[io.IOBase, None, None]: + """A context manager that yields a file object. + + Parameters + ---------- + file : file object or file path + A file object or a file path. + + Yields + ------ + file : io.IOBase + A file object. + *args + parameters to open + **kwargs + other parameters + + Raises + ------ + ValueError + If file is not a file object or a file + + Examples + -------- + >>> with open_file("file.txt") as file: + ... print(file.read()) + """ + if isinstance(file, io.IOBase): + yield file + elif isinstance(file, (str, os.PathLike)): + with open(file, *args, **kwargs) as f: + yield f + else: + raise ValueError("file must be a file object or a file path.") diff --git a/pyproject.toml b/pyproject.toml index 6efe08188..4bf2e64b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,11 @@ repository = "https://github.com/deepmodeling/dpdata" dpdata = "dpdata.cli:dpdata_cli" [project.optional-dependencies] +test = [ + # https://github.com/materialsproject/pymatgen/issues/3882 + # https://github.com/kuelumbus/rdkit-pypi/issues/102 + "numpy<2", +] ase = ['ase'] amber = [ 'parmed; python_version >= "3.8"', @@ -52,9 +57,9 @@ docs = [ 'recommonmark', 'sphinx_rtd_theme>=1.0.0rc1', 'numpydoc', - 'm2r2', + 'myst-parser', 'deepmodeling-sphinx>=0.1.1', - 'sphinx-argparse', + 'sphinx-argparse<0.5.0', 'rdkit', 'jupyterlite-sphinx', 'jupyterlite-xeus', diff --git a/tests/abacus.scf/INPUT.ch4-noforcestress b/tests/abacus.scf/INPUT.ch4-noforcestress new file mode 100644 index 000000000..66d4a4e4d --- /dev/null +++ b/tests/abacus.scf/INPUT.ch4-noforcestress @@ -0,0 +1,17 @@ +INPUT_PARAMETERS +#Parameters (General) +suffix ch4-noForceStress +stru_file STRU.ch4 #the filename of file containing atom positions +kpoint_file KPT.ch4 #the name of file containing k points +pseudo_dir ./ +nbands 8 +#Parameters (Accuracy) +ecutwfc 100 +symmetry 1 +scf_nmax 50 +smearing_method gauss #type of smearing: gauss; fd; fixed; mp; mp2; mv +smearing_sigma 0.01 +mixing_type plain +mixing_beta 0.5 +cal_force 1 +cal_stress 1 diff --git a/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log b/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log new file mode 100644 index 000000000..c6183689f --- /dev/null +++ b/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log @@ -0,0 +1,559 @@ + + WELCOME TO ABACUS + + 'Atomic-orbital Based Ab-initio Computation at UStc' + + Website: http://abacus.ustc.edu.cn/ + + Version: Parallel, v2.1.0 + Processor Number is 1 + Start Time is Fri May 7 16:18:50 2021 + + ------------------------------------------------------------------------------------ + + READING GENERAL INFORMATION + global_out_dir = OUT.ch4/ + global_in_card = INPUT + pseudo_dir = ./ + pseudo_type = auto + DRANK = 1 + DSIZE = 1 + DCOLOR = 1 + GRANK = 1 + GSIZE = 1 + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Reading atom information in unitcell: | + | From the input file and the structure file we know the number of | + | different elments in this unitcell, then we list the detail | + | information for each element, especially the zeta and polar atomic | + | orbital number for each element. The total atom number is counted. | + | We calculate the nearest atom distance for each atom and show the | + | Cartesian and Direct coordinates for each atom. We list the file | + | address for atomic orbitals. The volume and the lattice vectors | + | in real and reciprocal space is also shown. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + READING UNITCELL INFORMATION + ntype = 2 + atom label for species 1 = C + atom label for species 2 = H + lattice constant (Bohr) = 10 + lattice constant (Angstrom) = 5.29177 + + READING ATOM TYPE 1 + atom label = C + start magnetization = FALSE + L=0, number of zeta = 1 + L=1, number of zeta = 1 + L=2, number of zeta = 1 + number of atom for this type = 1 + + READING ATOM TYPE 2 + atom label = H + start magnetization = FALSE + L=0, number of zeta = 1 + L=1, number of zeta = 1 + L=2, number of zeta = 1 + number of atom for this type = 4 + + TOTAL ATOM NUMBER = 5 + + CARTESIAN COORDINATES ( UNIT = 10 Bohr ). + atom x y z mag + tauc_C1 0.981274803 0.861285385001 0.838442496 0 + tauc_H1 0.0235572019992 0.758025625 0.663513359999 0 + tauc_H2 0.78075702 0.889445934999 0.837363467999 0 + tauc_H3 0.0640916129996 0.0434389050006 0.840995502 0 + tauc_H4 0.0393212140007 0.756530859 0.00960920699981 0 + + + Volume (Bohr^3) = 1000 + Volume (A^3) = 148.184534296 + + Lattice vectors: (Cartesian coordinate: in unit of a_0) + +1 +0 +0 + +0 +1 +0 + +0 +0 +1 + Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0) + +1 +0 +0 + +0 +1 +0 + +0 -0 +1 + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Reading pseudopotentials files: | + | The pseudopotential file is in UPF format. The 'NC' indicates that | + | the type of pseudopotential is 'norm conserving'. Functional of | + | exchange and correlation is decided by 4 given parameters in UPF | + | file. We also read in the 'core correction' if there exists. | + | Also we can read the valence electrons number and the maximal | + | angular momentum used in this pseudopotential. We also read in the | + | trail wave function, trail atomic density and local-pseudopotential| + | on logrithmic grid. The non-local pseudopotential projector is also| + | read in if there is any. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + PAO radial cut off (Bohr) = 15 + + Read in pseudopotential file is C_ONCV_PBE-1.0.upf + pseudopotential type = NC + functional Ex = PBE + functional Ec = + functional GCEx = + functional GCEc = + nonlocal core correction = 0 + valence electrons = 4 + lmax = 1 + number of zeta = 0 + number of projectors = 4 + L of projector = 0 + L of projector = 0 + L of projector = 1 + L of projector = 1 + PAO radial cut off (Bohr) = 15 + + Read in pseudopotential file is H_ONCV_PBE-1.0.upf + pseudopotential type = NC + functional Ex = PBE + functional Ec = + functional GCEx = + functional GCEc = + nonlocal core correction = 0 + valence electrons = 1 + lmax = 0 + number of zeta = 0 + number of projectors = 2 + L of projector = 0 + L of projector = 0 + initial pseudo atomic orbital number = 0 + NLOCAL = 45 + + SETUP THE ELECTRONS NUMBER + electron number of element C = 4 + total electron number of element C = 4 + electron number of element H = 1 + total electron number of element H = 4 + occupied bands = 4 + NBANDS = 8 + DONE : SETUP UNITCELL Time : 0.0506949424744 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Doing symmetry analysis: | + | We calculate the norm of 3 vectors and the angles between them, | + | the type of Bravais lattice is given. We can judge if the unticell | + | is a primitive cell. Finally we give the point group operation for | + | this unitcell. We we use the point group operations to do symmetry | + | analysis on given k-point mesh and the charge density. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0) + +1 +0 +0 + +0 +1 +0 + +0 +0 +1 + right hand lattice = 1 + NORM_A = 1 + NORM_B = 1 + NORM_C = 1 + ALPHA (DEGREE) = 90 + BETA (DEGREE) = 90 + GAMMA (DEGREE) = 90 + BRAVAIS TYPE = 1 + BRAVAIS LATTICE NAME = 01. Cubic P (simple) + STANDARD LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0) + +1 +0 +0 + +0 +1 +0 + +0 +0 +1 + IBRAV = 1 + BRAVAIS = SIMPLE CUBIC + LATTICE CONSTANT A = 4.35889894354 + ibrav = 1 + ROTATION MATRICES = 48 + PURE POINT GROUP OPERATIONS = 1 + SPACE GROUP OPERATIONS = 1 + POINT GROUP = C_1 + DONE : SYMMETRY Time : 0.103345155716 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Setup K-points | + | We setup the k-points according to input parameters. | + | The reduced k-points are set according to symmetry operations. | + | We treat the spin as another set of k-points. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + + SETUP K-POINTS + nspin = 1 + Input type of k points = Monkhorst-Pack(Gamma) + nkstot = 1 + nkstot_ibz = 1 + IBZ DirectX DirectY DirectZ Weight ibz2bz + 1 0 0 0 1 0 + nkstot now = 1 + + KPOINTS DIRECT_X DIRECT_Y DIRECT_Z WEIGHT + 1 0 0 0 1 + + k-point number in this process = 1 + minimum distributed K point number = 1 + + KPOINTS CARTESIAN_X CARTESIAN_Y CARTESIAN_Z WEIGHT + 1 0 0 0 2 + + KPOINTS DIRECT_X DIRECT_Y DIRECT_Z WEIGHT + 1 0 0 0 2 + DONE : INIT K-POINTS Time : 0.103604078293 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Setup plane waves: | + | Use the energy cutoff and the lattice vectors to generate the | + | dimensions of FFT grid. The number of FFT grid on each processor | + | is 'nrxx'. The number of plane wave basis in reciprocal space is | + | different for charege/potential and wave functions. We also set | + | the 'sticks' for the parallel of FFT. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + + SETUP THE PLANE WAVE BASIS + energy cutoff for wavefunc (unit:Ry) = 100 + [fft grid for wave functions] = 64, 64, 64 + [fft grid for charge/potential] = 64, 64, 64 + [fft grid division] = 1, 1, 1 + [big fft grid for charge/potential] = 64, 64, 64 + nbxx = 262144 + nrxx = 262144 + + SETUP PLANE WAVES FOR CHARGE/POTENTIAL + number of plane waves = 135043 + number of sticks = 3181 + + SETUP PLANE WAVES FOR WAVE FUNCTIONS + number of plane waves = 16879 + number of sticks = 793 + + PARALLEL PW FOR CHARGE/POTENTIAL + PROC COLUMNS(POT) PW + 1 3181 135043 + --------------- sum ------------------- + 1 3181 135043 + + PARALLEL PW FOR WAVE FUNCTIONS + PROC COLUMNS(W) PW + 1 793 16879 + --------------- sum ------------------- + 1 793 16879 + + SETUP COORDINATES OF PLANE WAVES + number of total plane waves = 135043 + + SETUP COORDINATES OF PLANE WAVES + number of |g| = 847 + max |g| = 1013 + min |g| = 0 + DONE : INIT PLANEWAVE Time : 0.294428110123 (SEC) + + npwx = 16879 + + SETUP NONLOCAL PSEUDOPOTENTIALS IN PLANE WAVE BASIS + C non-local projectors: + projector 1 L=0 + projector 2 L=0 + projector 3 L=1 + projector 4 L=1 + H non-local projectors: + projector 1 L=0 + projector 2 L=0 + TOTAL NUMBER OF NONLOCAL PROJECTORS = 16 + DONE : LOCAL POTENTIAL Time : 0.339339256287 (SEC) + + + Init Non-Local PseudoPotential table : + Init Non-Local-Pseudopotential done. + DONE : NON-LOCAL POTENTIAL Time : 0.352857112885 (SEC) + + start_pot = atomic + DONE : INIT POTENTIAL Time : 0.642299 (SEC) + + + Make real space PAO into reciprocal space. + max mesh points in Pseudopotential = 601 + dq(describe PAO in reciprocal space) = 0.01 + max q = 1206 + + number of pseudo atomic orbitals for C is 0 + + number of pseudo atomic orbitals for H is 0 + DONE : INIT BASIS Time : 0.859031 (SEC) + + ------------------------------------------- + ------------------------------------------- + + PW ALGORITHM --------------- ION= 1 ELEC= 1-------------------------------- + K-point CG iter num Time(Sec) + 1 8.000000 1.417983 + + Density error is 0.705428908048 + Error Threshold = 0.010000000000 + + Energy Rydberg eV + E_KohnSham -16.017544057872 -217.929867153100 + E_Harris -16.408058532460 -223.243089158968 + E_Fermi -0.626082258799 -8.518286136372 + + PW ALGORITHM --------------- ION= 1 ELEC= 2-------------------------------- + K-point CG iter num Time(Sec) + 1 3.000000 0.623305 + + Density error is 0.037193130746 + Error Threshold = 0.008817861351 + + Energy Rydberg eV + E_KohnSham -16.144996950340 -219.663952717248 + E_Harris -16.158845333973 -219.852369642740 + E_Fermi -0.425221736464 -5.785438529359 + + PW ALGORITHM --------------- ION= 1 ELEC= 3-------------------------------- + K-point CG iter num Time(Sec) + 1 3.750000 0.765803 + + Density error is 0.008021287795 + Error Threshold = 0.000464914134 + + Energy Rydberg eV + E_KohnSham -16.143766641525 -219.647213507066 + E_Harris -16.146720471833 -219.687402430176 + E_Fermi -0.423547802407 -5.762663488108 + + PW ALGORITHM --------------- ION= 1 ELEC= 4-------------------------------- + K-point CG iter num Time(Sec) + 1 3.250000 0.691389 + + Density error is 0.001345350159 + Error Threshold = 0.000100266097 + + Energy Rydberg eV + E_KohnSham -16.143852570867 -219.648382635743 + E_Harris -16.144348491782 -219.655129985945 + E_Fermi -0.417940644498 -5.686374190966 + + PW ALGORITHM --------------- ION= 1 ELEC= 5-------------------------------- + K-point CG iter num Time(Sec) + 1 3.500000 0.724421 + + Density error is 0.000193438710 + Error Threshold = 0.000016816877 + + Energy Rydberg eV + E_KohnSham -16.143964486009 -219.649905319365 + E_Harris -16.144043239940 -219.650976821572 + E_Fermi -0.412189586104 -5.608127027270 + + PW ALGORITHM --------------- ION= 1 ELEC= 6-------------------------------- + K-point CG iter num Time(Sec) + 1 4.750000 0.929892 + + Density error is 0.000033144966 + Error Threshold = 0.000002417984 + + Energy Rydberg eV + E_KohnSham -16.143962062113 -219.649872340569 + E_Harris -16.143973121330 -219.650022808940 + E_Fermi -0.408955040431 -5.564118775681 + + PW ALGORITHM --------------- ION= 1 ELEC= 7-------------------------------- + K-point CG iter num Time(Sec) + 1 2.625000 0.571903 + + Density error is 0.000005013879 + Error Threshold = 0.000000414312 + + Energy Rydberg eV + E_KohnSham -16.143964909086 -219.649911075624 + E_Harris -16.143965842245 -219.649923771901 + E_Fermi -0.407628831587 -5.546074778663 + + PW ALGORITHM --------------- ION= 1 ELEC= 8-------------------------------- + K-point CG iter num Time(Sec) + 1 3.625000 0.729712 + + Density error is 0.000001011179 + Error Threshold = 0.000000062673 + + Energy Rydberg eV + E_KohnSham -16.143964999058 -219.649912299760 + E_Harris -16.143965129574 -219.649914075510 + E_Fermi -0.407392472263 -5.542858945081 + + PW ALGORITHM --------------- ION= 1 ELEC= 9-------------------------------- + K-point CG iter num Time(Sec) + 1 3.500000 0.698617 + + Density error is 0.000000165426 + Error Threshold = 0.000000012640 + + Energy Rydberg eV + E_KohnSham -16.143965114417 -219.649913869298 + E_Harris -16.143965110616 -219.649913817582 + E_Fermi -0.407267643914 -5.541160568271 + + PW ALGORITHM --------------- ION= 1 ELEC= 10-------------------------------- + K-point CG iter num Time(Sec) + 1 3.000000 0.629669 + + Density error is 0.000000034854 + Error Threshold = 0.000000002068 + + Energy Rydberg eV + E_KohnSham -16.143965129640 -219.649914076412 + E_Harris -16.143965128874 -219.649914065985 + E_Fermi -0.407195695883 -5.540181665084 + + PW ALGORITHM --------------- ION= 1 ELEC= 11-------------------------------- + K-point CG iter num Time(Sec) + 1 4.000000 0.813535 + + Density error is 0.000000008107 + Error Threshold = 0.000000000436 + + Energy Rydberg eV + E_KohnSham -16.143965128688 -219.649914063463 + E_Harris -16.143965130414 -219.649914086943 + E_Fermi -0.407157690601 -5.539664576698 + + PW ALGORITHM --------------- ION= 1 ELEC= 12-------------------------------- + K-point CG iter num Time(Sec) + 1 3.125000 0.674851 + + Density error is 0.000000001913 + Error Threshold = 0.000000000101 + + Energy Rydberg eV + E_KohnSham -16.143965127817 -219.649914051607 + E_Harris -16.143965128941 -219.649914066906 + E_Fermi -0.407141275858 -5.539441242659 + + PW ALGORITHM --------------- ION= 1 ELEC= 13-------------------------------- + K-point CG iter num Time(Sec) + 1 4.000000 0.804644 + + Density error is 0.000000000482 + Error Threshold = 0.000000000024 + + Energy Rydberg eV + E_KohnSham -16.143965127167 -219.649914042766 + E_Harris -16.143965127871 -219.649914052349 + E_band -5.963675568058 -81.139968748982 + E_one_elec -25.076858425354 -341.188162524121 + E_Hartree +13.701412040584 +186.417274397756 + E_xc -6.404563484424 -87.138556590897 + E_Ewald +1.636044742026 +22.259530674496 + E_demet -0.000000000000 -0.000000000000 + E_descf +0.000000000000 +0.000000000000 + E_efield +0.000000000000 +0.000000000000 + E_Fermi -0.407135332076 -5.539360373357 + charge density convergence is achieved + final etot is -219.649914042766 eV + + STATE ENERGY(eV) AND OCCUPATIONS. 1/1 kpoint (Cartesian) = 0.00000 0.00000 0.00000 (16879 pws) + [spin1_state] 1 -15.903159 2.000000 + [spin1_state] 2 -8.412500 2.000000 + [spin1_state] 3 -8.255893 2.000000 + [spin1_state] 4 -7.998432 2.000000 + [spin1_state] 5 -0.514947 0.000000 + [spin1_state] 6 2.727369 0.000000 + [spin1_state] 7 3.067094 0.000000 + [spin1_state] 8 4.824439 0.000000 + + + -------------------------------------------- + !FINAL_ETOT_IS -219.6499140427659142 eV + -------------------------------------------- + + + + + + + |CLASS_NAME---------|NAME---------------|TIME(Sec)-----|CALLS----|AVG------|PER%------- + A DC_Driv reading +0.104 1 +0.10 +0.59% + A DC_Driv divide_frag +0.19 1 +0.19 +1.09% + B PW_Basis gen_pw +0.19 1 +0.19 +1.09% + A DC_Driv solve_eachf +17.20 1 +17.20 +98.32% + B Run_Frag frag_pw_line +17.20 1 +17.20 +98.32% + X FFT FFT3D +9.69 1332 +0.01 +55.38% + E potential v_of_rho +3.20 14 +0.23 +18.27% + C wavefunc wfcinit +0.22 1 +0.22 +1.24% + G Hamilt_PW cinitcgg +2.31 14 +0.17 +13.20% + H Hamilt_PW h_psi +9.29 513 +0.02 +53.10% + I Hamilt_PW add_vuspsi +0.45 513 +0.00 +2.57% + C Ions opt_ions_pw +16.63 1 +16.63 +95.07% + D electrons self_consistent +14.90 1 +14.90 +85.15% + E electrons c_bands +10.25 13 +0.79 +58.60% + F Hamilt diago +10.08 13 +0.78 +57.59% + G Diago_CG diag +7.93 13 +0.61 +45.35% + E Charge mix_rho +0.35 13 +0.03 +1.98% + ---------------------------------------------------------------------------------------- + + CLASS_NAME---------|NAME---------------|MEMORY(MB)-------- + +29.4953 + PW_Basis struc_fac +4.1212 + Use_FFT porter +4.0000 + wavefunc evc +2.0604 + Charge rho +2.0000 + Charge rho_save +2.0000 + Charge rho_core +2.0000 + potential vltot +2.0000 + potential vr +2.0000 + potential vrs +2.0000 + potential vrs1 +2.0000 + potential vnew +2.0000 + Charge rhog +1.0303 + Charge rhog_save +1.0303 + Charge rhog_core +1.0303 + ---------------------------------------------------------- + + Start Time : Fri May 7 16:18:50 2021 + Finish Time : Fri May 7 16:19:08 2021 + Total Time : +0 h +0 mins +18 secs diff --git a/tests/comp_sys.py b/tests/comp_sys.py index 99879af61..a3a916a09 100644 --- a/tests/comp_sys.py +++ b/tests/comp_sys.py @@ -105,6 +105,72 @@ def test_virial(self): ) +def _make_comp_ms_test_func(comp_sys_test_func): + """ + Dynamically generates a test function for multi-system comparisons. + + Args: + comp_sys_test_func (Callable): The original test function for single systems. + + Returns + ------- + Callable: A new test function that can handle comparisons between multi-systems. + """ + + def comp_ms_test_func(iobj): + assert hasattr(iobj, "ms_1") and hasattr( + iobj, "ms_2" + ), "Multi-system objects must be present" + iobj.assertEqual(len(iobj.ms_1), len(iobj.ms_2)) + keys = [ii.formula for ii in iobj.ms_1] + keys_2 = [ii.formula for ii in iobj.ms_2] + assert sorted(keys) == sorted( + keys_2 + ), f"Keys of two MS are not equal: {keys} != {keys_2}" + for kk in keys: + iobj.system_1 = iobj.ms_1[kk] + iobj.system_2 = iobj.ms_2[kk] + comp_sys_test_func(iobj) + del iobj.system_1 + del iobj.system_2 + + return comp_ms_test_func + + +def _make_comp_ms_class(comp_class): + """ + Dynamically generates a test class for multi-system comparisons. + + Args: + comp_class (type): The original test class for single systems. + + Returns + ------- + type: A new test class that can handle comparisons between multi-systems. + """ + + class CompMS: + pass + + test_methods = [ + func + for func in dir(comp_class) + if callable(getattr(comp_class, func)) and func.startswith("test_") + ] + + for func in test_methods: + setattr(CompMS, func, _make_comp_ms_test_func(getattr(comp_class, func))) + + return CompMS + + +# MultiSystems comparison from single System comparison +CompMultiSys = _make_comp_ms_class(CompSys) + +# LabeledMultiSystems comparison from single LabeledSystem comparison +CompLabeledMultiSys = _make_comp_ms_class(CompLabeledSys) + + class MultiSystems: def test_systems_name(self): self.assertEqual(set(self.systems.systems), set(self.system_names)) @@ -127,3 +193,21 @@ class IsNoPBC: def test_is_nopbc(self): self.assertTrue(self.system_1.nopbc) self.assertTrue(self.system_2.nopbc) + + +class MSAllIsPBC: + def test_is_pbc(self): + assert hasattr(self, "ms_1") and hasattr( + self, "ms_2" + ), "Multi-system objects must be present and iterable" + self.assertTrue(all([not ss.nopbc for ss in self.ms_1])) + self.assertTrue(all([not ss.nopbc for ss in self.ms_2])) + + +class MSAllIsNoPBC: + def test_is_nopbc(self): + assert hasattr(self, "ms_1") and hasattr( + self, "ms_2" + ), "Multi-system objects must be present and iterable" + self.assertTrue(all([ss.nopbc for ss in self.ms_1])) + self.assertTrue(all([ss.nopbc for ss in self.ms_2])) diff --git a/tests/plugin/dpdata_plugin_test/__init__.py b/tests/plugin/dpdata_plugin_test/__init__.py index ef26e7c1d..4e445eb2f 100644 --- a/tests/plugin/dpdata_plugin_test/__init__.py +++ b/tests/plugin/dpdata_plugin_test/__init__.py @@ -10,6 +10,10 @@ DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False), labeled=True ) +register_data_type( + DataType("foo", np.ndarray, (Axis.NFRAMES, 3, 3), required=False), labeled=False +) + register_data_type( DataType("bar", np.ndarray, (Axis.NFRAMES, Axis.NATOMS, -1), required=False), labeled=True, diff --git a/tests/test_abacus_pw_scf.py b/tests/test_abacus_pw_scf.py index 8d13dddcf..20751f819 100644 --- a/tests/test_abacus_pw_scf.py +++ b/tests/test_abacus_pw_scf.py @@ -146,5 +146,23 @@ def test_return_zero(self): self.assertEqual(len(self.system_ch4), 0) +class TestABACUSLabeledOutputNoFS(unittest.TestCase): + def setUp(self): + shutil.copy("abacus.scf/INPUT.ch4-noforcestress", "abacus.scf/INPUT") + + def tearDown(self): + if os.path.isfile("abacus.scf/INPUT"): + os.remove("abacus.scf/INPUT") + + def test_noforcestress_job(self): + # check below will not throw error + system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") + # check the returned force is empty + self.assertFalse(system_ch4.data["forces"]) + self.assertTrue("virials" not in system_ch4.data) + # test append self + system_ch4.append(system_ch4) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py index e94ba5e0f..230475ab3 100644 --- a/tests/test_custom_data_type.py +++ b/tests/test_custom_data_type.py @@ -9,10 +9,12 @@ from dpdata.data_type import Axis, DataType -class TestDeepmdLoadDumpComp(unittest.TestCase): +class DeepmdLoadDumpCompTest: def setUp(self): - self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") - self.foo = np.ones((len(self.system), 2, 4)) + self.system = self.cls( + data=dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar").data + ) + self.foo = np.ones((len(self.system), *self.shape)) self.system.data["foo"] = self.foo self.system.check_data() @@ -23,7 +25,7 @@ def test_to_deepmd_raw(self): def test_from_deepmd_raw(self): self.system.to_deepmd_raw("data_foo") - x = dpdata.LabeledSystem("data_foo", fmt="deepmd/raw") + x = self.cls("data_foo", fmt="deepmd/raw") np.testing.assert_allclose(x.data["foo"], self.foo) def test_to_deepmd_npy(self): @@ -33,7 +35,7 @@ def test_to_deepmd_npy(self): def test_from_deepmd_npy(self): self.system.to_deepmd_npy("data_foo") - x = dpdata.LabeledSystem("data_foo", fmt="deepmd/npy") + x = self.cls("data_foo", fmt="deepmd/npy") np.testing.assert_allclose(x.data["foo"], self.foo) def test_to_deepmd_hdf5(self): @@ -44,17 +46,43 @@ def test_to_deepmd_hdf5(self): def test_from_deepmd_hdf5(self): self.system.to_deepmd_hdf5("data_foo.h5") - x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5") + x = self.cls("data_foo.h5", fmt="deepmd/hdf5") np.testing.assert_allclose(x.data["foo"], self.foo) def test_duplicated_data_type(self): - dt = DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False) - n_dtypes_old = len(dpdata.LabeledSystem.DTYPES) + dt = DataType("foo", np.ndarray, (Axis.NFRAMES, *self.shape), required=False) + n_dtypes_old = len(self.cls.DTYPES) with self.assertWarns(UserWarning): - dpdata.LabeledSystem.register_data_type(dt) - n_dtypes_new = len(dpdata.LabeledSystem.DTYPES) + self.cls.register_data_type(dt) + n_dtypes_new = len(self.cls.DTYPES) self.assertEqual(n_dtypes_old, n_dtypes_new) + def test_to_deepmd_npy_mixed(self): + ms = dpdata.MultiSystems(self.system) + ms.to_deepmd_npy_mixed("data_foo_mixed") + x = dpdata.MultiSystems().load_systems_from_file( + "data_foo_mixed", + fmt="deepmd/npy/mixed", + labeled=issubclass(self.cls, dpdata.LabeledSystem), + ) + np.testing.assert_allclose(list(x.systems.values())[0].data["foo"], self.foo) + + +class TestDeepmdLoadDumpCompUnlabeled(unittest.TestCase, DeepmdLoadDumpCompTest): + cls = dpdata.System + shape = (3, 3) + + def setUp(self): + DeepmdLoadDumpCompTest.setUp(self) + + +class TestDeepmdLoadDumpCompLabeled(unittest.TestCase, DeepmdLoadDumpCompTest): + cls = dpdata.LabeledSystem + shape = (2, 4) + + def setUp(self): + DeepmdLoadDumpCompTest.setUp(self) + class TestDeepmdLoadDumpCompAny(unittest.TestCase): def setUp(self): diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py index 02044932e..9f0c5ed46 100644 --- a/tests/test_deepmd_mixed.py +++ b/tests/test_deepmd_mixed.py @@ -6,12 +6,18 @@ from glob import glob import numpy as np -from comp_sys import CompLabeledSys, IsNoPBC, MultiSystems +from comp_sys import ( + CompLabeledMultiSys, + CompLabeledSys, + IsNoPBC, + MSAllIsNoPBC, + MultiSystems, +) from context import dpdata class TestMixedMultiSystemsDumpLoad( - unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC ): def setUp(self): self.places = 6 @@ -62,8 +68,8 @@ def setUp(self): self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") self.systems = dpdata.MultiSystems() self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") - self.system_1 = self.ms["C1H4A0B0D0"] - self.system_2 = self.systems["C1H4A0B0D0"] + self.ms_1 = self.ms + self.ms_2 = self.systems mixed_sets = glob("tmp.deepmd.mixed/*/set.*") self.assertEqual(len(mixed_sets), 2) for i in mixed_sets: @@ -111,8 +117,121 @@ def test_str(self): ) +class TestMixedMultiSystemsDumpLoadTypeMap( + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC +): + def setUp(self): + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + # C1H4 + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + + # C1H3 + system_2 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1B2 + system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 2, 1] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3]) + # C1H1A2B1 + system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "D"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1C2 + system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data) + + self.ms = dpdata.MultiSystems( + system_1, + system_2, + system_1_modified_type_1, + system_1_modified_type_2, + system_1_modified_type_3, + ) + + self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed") + self.place_holder_ms = dpdata.MultiSystems() + self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") + + new_type_map = ["H", "C", "D", "A", "B"] + self.systems = dpdata.MultiSystems() + self.systems.from_deepmd_npy_mixed( + "tmp.deepmd.mixed", fmt="deepmd/npy/mixed", type_map=new_type_map + ) + for kk in [ii.formula for ii in self.ms]: + # apply type_map to each system + self.ms[kk].apply_type_map(new_type_map) + # revise keys in dict according because the type_map is updated. + tmp_ss = self.ms.systems.pop(kk) + self.ms.systems[tmp_ss.formula] = tmp_ss + + self.ms_1 = self.ms + self.ms_2 = self.systems + mixed_sets = glob("tmp.deepmd.mixed/*/set.*") + self.assertEqual(len(mixed_sets), 2) + for i in mixed_sets: + self.assertEqual( + os.path.exists(os.path.join(i, "real_atom_types.npy")), True + ) + + self.system_names = [ + "H4C1D0A0B0", + "H3C1D0A0B0", + "H1C1D0A1B2", + "H1C1D0A2B1", + "H1C1D2A1B0", + ] + self.system_sizes = { + "H4C1D0A0B0": 1, + "H3C1D0A0B0": 1, + "H1C1D0A1B2": 1, + "H1C1D0A2B1": 1, + "H1C1D2A1B0": 1, + } + self.atom_names = ["H", "C", "D", "A", "B"] + + def tearDown(self): + if os.path.exists("tmp.deepmd.mixed"): + shutil.rmtree("tmp.deepmd.mixed") + + def test_len(self): + self.assertEqual(len(self.ms), 5) + self.assertEqual(len(self.place_holder_ms), 2) + self.assertEqual(len(self.systems), 5) + + def test_get_nframes(self): + self.assertEqual(self.ms.get_nframes(), 5) + self.assertEqual(self.place_holder_ms.get_nframes(), 5) + self.assertEqual(self.systems.get_nframes(), 5) + + def test_str(self): + self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)") + self.assertEqual( + str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)" + ) + self.assertEqual( + str(self.systems), "MultiSystems (5 systems containing 5 frames)" + ) + + class TestMixedMultiSystemsDumpLoadSetSize( - unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC ): def setUp(self): self.places = 6 @@ -163,8 +282,8 @@ def setUp(self): self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") self.systems = dpdata.MultiSystems() self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") - self.system_1 = self.ms["C1H4A0B0D0"] - self.system_2 = self.systems["C1H4A0B0D0"] + self.ms_1 = self.ms + self.ms_2 = self.systems mixed_sets = glob("tmp.deepmd.mixed/*/set.*") self.assertEqual(len(mixed_sets), 5) for i in mixed_sets: @@ -213,7 +332,7 @@ def test_str(self): class TestMixedMultiSystemsTypeChange( - unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC ): def setUp(self): self.places = 6 @@ -265,8 +384,8 @@ def setUp(self): self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") self.systems = dpdata.MultiSystems(type_map=["TOKEN"]) self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") - self.system_1 = self.ms["TOKEN0C1H4A0B0D0"] - self.system_2 = self.systems["TOKEN0C1H4A0B0D0"] + self.ms_1 = self.ms + self.ms_2 = self.systems mixed_sets = glob("tmp.deepmd.mixed/*/set.*") self.assertEqual(len(mixed_sets), 2) for i in mixed_sets: @@ -314,5 +433,27 @@ def test_str(self): ) +class TestMixedSingleSystemsDump(unittest.TestCase, CompLabeledSys, IsNoPBC): + def setUp(self): + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + # C1H4 + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + self.system_2 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + # test dump + self.system_1.to("deepmd/npy/mixed", "tmp.deepmd.mixed.single") + + def tearDown(self): + if os.path.exists("tmp.deepmd.mixed.single"): + shutil.rmtree("tmp.deepmd.mixed.single") + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_read_file.py b/tests/test_read_file.py new file mode 100644 index 000000000..e7dca54e6 --- /dev/null +++ b/tests/test_read_file.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import io +import unittest +from pathlib import Path + +from dpdata.utils import open_file + + +class TestReadFile(unittest.TestCase): + def test_open_file_from_string_io(self): + string_io = io.StringIO("Hello, world!") + with open_file(string_io) as file: + self.assertEqual(file.read(), "Hello, world!") + + def test_open_file_from_file_str(self): + with open_file("/dev/null") as file: + self.assertEqual(file.read(), Path("/dev/null").read_text()) + + def test_open_file_from_file_path(self): + with open_file(Path("/dev/null")) as file: + self.assertEqual(file.read(), Path("/dev/null").read_text())