From e19e34e6bff4de658ca8b5dffff428c400b126a1 Mon Sep 17 00:00:00 2001 From: Duo <50307526+iProzd@users.noreply.github.com> Date: Fri, 7 Jun 2024 04:27:00 +0800 Subject: [PATCH 01/24] fix: use deepcopy when dumping mixed types (#671) Use deepcopy when dumping mixed types in system level. Fix #670 . Typically, users use the following command to dump list of systems to mixed_type: ``` dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") ``` This command will rebuild a new system with a new data dict, thus it **does not change the original `system.data`**. In #670 , when users use ``` system.to('deepmd/npy/mixed', 'dir') ``` on a system in standard format, it would directly convert the original `system.data` into mixed types, thus the original `system.data` would be changed. Use deepcopy to prevent this. ## Summary by CodeRabbit - **Bug Fixes** - Enhanced data integrity by preserving the original content before processing. - **Tests** - Added new tests to ensure proper functionality of the data dumping process. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- dpdata/deepmd/mixed.py | 4 ++++ tests/test_deepmd_mixed.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/dpdata/deepmd/mixed.py b/dpdata/deepmd/mixed.py index b25107dbc..ccf9b2f58 100644 --- a/dpdata/deepmd/mixed.py +++ b/dpdata/deepmd/mixed.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy import glob import os import shutil @@ -160,6 +161,9 @@ def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True): if "real_atom_types" not in data: from dpdata import LabeledSystem, System + # not change the original content + data = copy.deepcopy(data) + if "energies" in data: temp_sys = LabeledSystem(data=data) else: diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py index 02044932e..70a09dbe8 100644 --- a/tests/test_deepmd_mixed.py +++ b/tests/test_deepmd_mixed.py @@ -314,5 +314,27 @@ def test_str(self): ) +class TestMixedSingleSystemsDump(unittest.TestCase, CompLabeledSys, IsNoPBC): + def setUp(self): + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + # C1H4 + self.system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + self.system_2 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + # test dump + self.system_1.to("deepmd/npy/mixed", "tmp.deepmd.mixed.single") + + def tearDown(self): + if os.path.exists("tmp.deepmd.mixed.single"): + shutil.rmtree("tmp.deepmd.mixed.single") + + if __name__ == "__main__": unittest.main() From a9a8b480410f2757d846ac986da270b024976bee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 11 Jun 2024 09:41:56 +0800 Subject: [PATCH 02/24] [pre-commit.ci] pre-commit autoupdate (#674) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.7 → v0.4.8](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.7...v0.4.8) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 23bea3ebd..f4451dedb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.4.7 + rev: v0.4.8 hooks: - id: ruff args: ["--fix"] From fb85312d5a9da4f4e10e63df538d1343afd7db02 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 17:40:02 -0400 Subject: [PATCH 03/24] [pre-commit.ci] pre-commit autoupdate (#675) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.8 → v0.4.10](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.8...v0.4.10) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jinzhe Zeng --- .github/workflows/benchmark.yml | 6 +++--- .github/workflows/test.yml | 2 +- .pre-commit-config.yaml | 2 +- pyproject.toml | 5 +++++ 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 650ff59ef..25650184b 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,11 +1,11 @@ -name: Python package +name: Benchmark on: - push - pull_request jobs: - build: + benchmark: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -15,7 +15,7 @@ jobs: python-version: 3.12 - run: curl -LsSf https://astral.sh/uv/install.sh | sh - name: Install dependencies - run: uv pip install --system .[amber,ase,pymatgen,benchmark] rdkit openbabel-wheel + run: uv pip install --system .[test,amber,ase,pymatgen,benchmark] rdkit openbabel-wheel - name: Run benchmarks uses: CodSpeedHQ/action@v2 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4a17fcde1..a6d23270c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,7 +20,7 @@ jobs: python-version: ${{ matrix.python-version }} - run: curl -LsSf https://astral.sh/uv/install.sh | sh - name: Install dependencies - run: uv pip install --system .[amber,ase,pymatgen] coverage ./tests/plugin rdkit openbabel-wheel + run: uv pip install --system .[test,amber,ase,pymatgen] coverage ./tests/plugin rdkit openbabel-wheel - name: Test run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report - name: Run codecov diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f4451dedb..d9e92b7b3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.4.8 + rev: v0.4.10 hooks: - id: ruff args: ["--fix"] diff --git a/pyproject.toml b/pyproject.toml index 6efe08188..35b30decf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,11 @@ repository = "https://github.com/deepmodeling/dpdata" dpdata = "dpdata.cli:dpdata_cli" [project.optional-dependencies] +test = [ + # https://github.com/materialsproject/pymatgen/issues/3882 + # https://github.com/kuelumbus/rdkit-pypi/issues/102 + "numpy<2", +] ase = ['ase'] amber = [ 'parmed; python_version >= "3.8"', From e429d9a54ef6d2cbb2d55eefa68bb9b96045fa25 Mon Sep 17 00:00:00 2001 From: Peng Xingliang <91927439+pxlxingliang@users.noreply.github.com> Date: Tue, 2 Jul 2024 13:18:56 +0800 Subject: [PATCH 04/24] abacus: fix bug in collecting force (#669) ## Summary by CodeRabbit - **Bug Fixes** - Modified force data handling in computational simulations to return `None` instead of an empty list when no forces are present. - **Tests** - Added new test scenarios to validate force data handling in labeled system objects for computational simulations. --------- Signed-off-by: Jinzhe Zeng Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jinzhe Zeng --- dpdata/abacus/scf.py | 4 +- tests/abacus.scf/INPUT.ch4-noforcestress | 17 + .../OUT.ch4-noForceStress/running_scf.log | 559 ++++++++++++++++++ tests/test_abacus_pw_scf.py | 18 + 4 files changed, 596 insertions(+), 2 deletions(-) create mode 100644 tests/abacus.scf/INPUT.ch4-noforcestress create mode 100644 tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index 193e4d4b5..c06f8cd3d 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -190,7 +190,7 @@ def collect_force(outlines): def get_force(outlines, natoms): force = collect_force(outlines) if len(force) == 0: - return [[]] + return None else: return np.array(force[-1]) # only return the last force @@ -285,7 +285,7 @@ def get_frame(fname): data["cells"] = cell[np.newaxis, :, :] data["coords"] = coords[np.newaxis, :, :] data["energies"] = np.array(energy)[np.newaxis] - data["forces"] = force[np.newaxis, :, :] + data["forces"] = np.empty((0,)) if force is None else force[np.newaxis, :, :] if stress is not None: data["virials"] = stress[np.newaxis, :, :] data["orig"] = np.zeros(3) diff --git a/tests/abacus.scf/INPUT.ch4-noforcestress b/tests/abacus.scf/INPUT.ch4-noforcestress new file mode 100644 index 000000000..66d4a4e4d --- /dev/null +++ b/tests/abacus.scf/INPUT.ch4-noforcestress @@ -0,0 +1,17 @@ +INPUT_PARAMETERS +#Parameters (General) +suffix ch4-noForceStress +stru_file STRU.ch4 #the filename of file containing atom positions +kpoint_file KPT.ch4 #the name of file containing k points +pseudo_dir ./ +nbands 8 +#Parameters (Accuracy) +ecutwfc 100 +symmetry 1 +scf_nmax 50 +smearing_method gauss #type of smearing: gauss; fd; fixed; mp; mp2; mv +smearing_sigma 0.01 +mixing_type plain +mixing_beta 0.5 +cal_force 1 +cal_stress 1 diff --git a/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log b/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log new file mode 100644 index 000000000..c6183689f --- /dev/null +++ b/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log @@ -0,0 +1,559 @@ + + WELCOME TO ABACUS + + 'Atomic-orbital Based Ab-initio Computation at UStc' + + Website: http://abacus.ustc.edu.cn/ + + Version: Parallel, v2.1.0 + Processor Number is 1 + Start Time is Fri May 7 16:18:50 2021 + + ------------------------------------------------------------------------------------ + + READING GENERAL INFORMATION + global_out_dir = OUT.ch4/ + global_in_card = INPUT + pseudo_dir = ./ + pseudo_type = auto + DRANK = 1 + DSIZE = 1 + DCOLOR = 1 + GRANK = 1 + GSIZE = 1 + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Reading atom information in unitcell: | + | From the input file and the structure file we know the number of | + | different elments in this unitcell, then we list the detail | + | information for each element, especially the zeta and polar atomic | + | orbital number for each element. The total atom number is counted. | + | We calculate the nearest atom distance for each atom and show the | + | Cartesian and Direct coordinates for each atom. We list the file | + | address for atomic orbitals. The volume and the lattice vectors | + | in real and reciprocal space is also shown. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + READING UNITCELL INFORMATION + ntype = 2 + atom label for species 1 = C + atom label for species 2 = H + lattice constant (Bohr) = 10 + lattice constant (Angstrom) = 5.29177 + + READING ATOM TYPE 1 + atom label = C + start magnetization = FALSE + L=0, number of zeta = 1 + L=1, number of zeta = 1 + L=2, number of zeta = 1 + number of atom for this type = 1 + + READING ATOM TYPE 2 + atom label = H + start magnetization = FALSE + L=0, number of zeta = 1 + L=1, number of zeta = 1 + L=2, number of zeta = 1 + number of atom for this type = 4 + + TOTAL ATOM NUMBER = 5 + + CARTESIAN COORDINATES ( UNIT = 10 Bohr ). + atom x y z mag + tauc_C1 0.981274803 0.861285385001 0.838442496 0 + tauc_H1 0.0235572019992 0.758025625 0.663513359999 0 + tauc_H2 0.78075702 0.889445934999 0.837363467999 0 + tauc_H3 0.0640916129996 0.0434389050006 0.840995502 0 + tauc_H4 0.0393212140007 0.756530859 0.00960920699981 0 + + + Volume (Bohr^3) = 1000 + Volume (A^3) = 148.184534296 + + Lattice vectors: (Cartesian coordinate: in unit of a_0) + +1 +0 +0 + +0 +1 +0 + +0 +0 +1 + Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0) + +1 +0 +0 + +0 +1 +0 + +0 -0 +1 + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Reading pseudopotentials files: | + | The pseudopotential file is in UPF format. The 'NC' indicates that | + | the type of pseudopotential is 'norm conserving'. Functional of | + | exchange and correlation is decided by 4 given parameters in UPF | + | file. We also read in the 'core correction' if there exists. | + | Also we can read the valence electrons number and the maximal | + | angular momentum used in this pseudopotential. We also read in the | + | trail wave function, trail atomic density and local-pseudopotential| + | on logrithmic grid. The non-local pseudopotential projector is also| + | read in if there is any. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + PAO radial cut off (Bohr) = 15 + + Read in pseudopotential file is C_ONCV_PBE-1.0.upf + pseudopotential type = NC + functional Ex = PBE + functional Ec = + functional GCEx = + functional GCEc = + nonlocal core correction = 0 + valence electrons = 4 + lmax = 1 + number of zeta = 0 + number of projectors = 4 + L of projector = 0 + L of projector = 0 + L of projector = 1 + L of projector = 1 + PAO radial cut off (Bohr) = 15 + + Read in pseudopotential file is H_ONCV_PBE-1.0.upf + pseudopotential type = NC + functional Ex = PBE + functional Ec = + functional GCEx = + functional GCEc = + nonlocal core correction = 0 + valence electrons = 1 + lmax = 0 + number of zeta = 0 + number of projectors = 2 + L of projector = 0 + L of projector = 0 + initial pseudo atomic orbital number = 0 + NLOCAL = 45 + + SETUP THE ELECTRONS NUMBER + electron number of element C = 4 + total electron number of element C = 4 + electron number of element H = 1 + total electron number of element H = 4 + occupied bands = 4 + NBANDS = 8 + DONE : SETUP UNITCELL Time : 0.0506949424744 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Doing symmetry analysis: | + | We calculate the norm of 3 vectors and the angles between them, | + | the type of Bravais lattice is given. We can judge if the unticell | + | is a primitive cell. Finally we give the point group operation for | + | this unitcell. We we use the point group operations to do symmetry | + | analysis on given k-point mesh and the charge density. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0) + +1 +0 +0 + +0 +1 +0 + +0 +0 +1 + right hand lattice = 1 + NORM_A = 1 + NORM_B = 1 + NORM_C = 1 + ALPHA (DEGREE) = 90 + BETA (DEGREE) = 90 + GAMMA (DEGREE) = 90 + BRAVAIS TYPE = 1 + BRAVAIS LATTICE NAME = 01. Cubic P (simple) + STANDARD LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0) + +1 +0 +0 + +0 +1 +0 + +0 +0 +1 + IBRAV = 1 + BRAVAIS = SIMPLE CUBIC + LATTICE CONSTANT A = 4.35889894354 + ibrav = 1 + ROTATION MATRICES = 48 + PURE POINT GROUP OPERATIONS = 1 + SPACE GROUP OPERATIONS = 1 + POINT GROUP = C_1 + DONE : SYMMETRY Time : 0.103345155716 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Setup K-points | + | We setup the k-points according to input parameters. | + | The reduced k-points are set according to symmetry operations. | + | We treat the spin as another set of k-points. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + + SETUP K-POINTS + nspin = 1 + Input type of k points = Monkhorst-Pack(Gamma) + nkstot = 1 + nkstot_ibz = 1 + IBZ DirectX DirectY DirectZ Weight ibz2bz + 1 0 0 0 1 0 + nkstot now = 1 + + KPOINTS DIRECT_X DIRECT_Y DIRECT_Z WEIGHT + 1 0 0 0 1 + + k-point number in this process = 1 + minimum distributed K point number = 1 + + KPOINTS CARTESIAN_X CARTESIAN_Y CARTESIAN_Z WEIGHT + 1 0 0 0 2 + + KPOINTS DIRECT_X DIRECT_Y DIRECT_Z WEIGHT + 1 0 0 0 2 + DONE : INIT K-POINTS Time : 0.103604078293 (SEC) + + + + + + >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + | | + | Setup plane waves: | + | Use the energy cutoff and the lattice vectors to generate the | + | dimensions of FFT grid. The number of FFT grid on each processor | + | is 'nrxx'. The number of plane wave basis in reciprocal space is | + | different for charege/potential and wave functions. We also set | + | the 'sticks' for the parallel of FFT. | + | | + <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + + + + + SETUP THE PLANE WAVE BASIS + energy cutoff for wavefunc (unit:Ry) = 100 + [fft grid for wave functions] = 64, 64, 64 + [fft grid for charge/potential] = 64, 64, 64 + [fft grid division] = 1, 1, 1 + [big fft grid for charge/potential] = 64, 64, 64 + nbxx = 262144 + nrxx = 262144 + + SETUP PLANE WAVES FOR CHARGE/POTENTIAL + number of plane waves = 135043 + number of sticks = 3181 + + SETUP PLANE WAVES FOR WAVE FUNCTIONS + number of plane waves = 16879 + number of sticks = 793 + + PARALLEL PW FOR CHARGE/POTENTIAL + PROC COLUMNS(POT) PW + 1 3181 135043 + --------------- sum ------------------- + 1 3181 135043 + + PARALLEL PW FOR WAVE FUNCTIONS + PROC COLUMNS(W) PW + 1 793 16879 + --------------- sum ------------------- + 1 793 16879 + + SETUP COORDINATES OF PLANE WAVES + number of total plane waves = 135043 + + SETUP COORDINATES OF PLANE WAVES + number of |g| = 847 + max |g| = 1013 + min |g| = 0 + DONE : INIT PLANEWAVE Time : 0.294428110123 (SEC) + + npwx = 16879 + + SETUP NONLOCAL PSEUDOPOTENTIALS IN PLANE WAVE BASIS + C non-local projectors: + projector 1 L=0 + projector 2 L=0 + projector 3 L=1 + projector 4 L=1 + H non-local projectors: + projector 1 L=0 + projector 2 L=0 + TOTAL NUMBER OF NONLOCAL PROJECTORS = 16 + DONE : LOCAL POTENTIAL Time : 0.339339256287 (SEC) + + + Init Non-Local PseudoPotential table : + Init Non-Local-Pseudopotential done. + DONE : NON-LOCAL POTENTIAL Time : 0.352857112885 (SEC) + + start_pot = atomic + DONE : INIT POTENTIAL Time : 0.642299 (SEC) + + + Make real space PAO into reciprocal space. + max mesh points in Pseudopotential = 601 + dq(describe PAO in reciprocal space) = 0.01 + max q = 1206 + + number of pseudo atomic orbitals for C is 0 + + number of pseudo atomic orbitals for H is 0 + DONE : INIT BASIS Time : 0.859031 (SEC) + + ------------------------------------------- + ------------------------------------------- + + PW ALGORITHM --------------- ION= 1 ELEC= 1-------------------------------- + K-point CG iter num Time(Sec) + 1 8.000000 1.417983 + + Density error is 0.705428908048 + Error Threshold = 0.010000000000 + + Energy Rydberg eV + E_KohnSham -16.017544057872 -217.929867153100 + E_Harris -16.408058532460 -223.243089158968 + E_Fermi -0.626082258799 -8.518286136372 + + PW ALGORITHM --------------- ION= 1 ELEC= 2-------------------------------- + K-point CG iter num Time(Sec) + 1 3.000000 0.623305 + + Density error is 0.037193130746 + Error Threshold = 0.008817861351 + + Energy Rydberg eV + E_KohnSham -16.144996950340 -219.663952717248 + E_Harris -16.158845333973 -219.852369642740 + E_Fermi -0.425221736464 -5.785438529359 + + PW ALGORITHM --------------- ION= 1 ELEC= 3-------------------------------- + K-point CG iter num Time(Sec) + 1 3.750000 0.765803 + + Density error is 0.008021287795 + Error Threshold = 0.000464914134 + + Energy Rydberg eV + E_KohnSham -16.143766641525 -219.647213507066 + E_Harris -16.146720471833 -219.687402430176 + E_Fermi -0.423547802407 -5.762663488108 + + PW ALGORITHM --------------- ION= 1 ELEC= 4-------------------------------- + K-point CG iter num Time(Sec) + 1 3.250000 0.691389 + + Density error is 0.001345350159 + Error Threshold = 0.000100266097 + + Energy Rydberg eV + E_KohnSham -16.143852570867 -219.648382635743 + E_Harris -16.144348491782 -219.655129985945 + E_Fermi -0.417940644498 -5.686374190966 + + PW ALGORITHM --------------- ION= 1 ELEC= 5-------------------------------- + K-point CG iter num Time(Sec) + 1 3.500000 0.724421 + + Density error is 0.000193438710 + Error Threshold = 0.000016816877 + + Energy Rydberg eV + E_KohnSham -16.143964486009 -219.649905319365 + E_Harris -16.144043239940 -219.650976821572 + E_Fermi -0.412189586104 -5.608127027270 + + PW ALGORITHM --------------- ION= 1 ELEC= 6-------------------------------- + K-point CG iter num Time(Sec) + 1 4.750000 0.929892 + + Density error is 0.000033144966 + Error Threshold = 0.000002417984 + + Energy Rydberg eV + E_KohnSham -16.143962062113 -219.649872340569 + E_Harris -16.143973121330 -219.650022808940 + E_Fermi -0.408955040431 -5.564118775681 + + PW ALGORITHM --------------- ION= 1 ELEC= 7-------------------------------- + K-point CG iter num Time(Sec) + 1 2.625000 0.571903 + + Density error is 0.000005013879 + Error Threshold = 0.000000414312 + + Energy Rydberg eV + E_KohnSham -16.143964909086 -219.649911075624 + E_Harris -16.143965842245 -219.649923771901 + E_Fermi -0.407628831587 -5.546074778663 + + PW ALGORITHM --------------- ION= 1 ELEC= 8-------------------------------- + K-point CG iter num Time(Sec) + 1 3.625000 0.729712 + + Density error is 0.000001011179 + Error Threshold = 0.000000062673 + + Energy Rydberg eV + E_KohnSham -16.143964999058 -219.649912299760 + E_Harris -16.143965129574 -219.649914075510 + E_Fermi -0.407392472263 -5.542858945081 + + PW ALGORITHM --------------- ION= 1 ELEC= 9-------------------------------- + K-point CG iter num Time(Sec) + 1 3.500000 0.698617 + + Density error is 0.000000165426 + Error Threshold = 0.000000012640 + + Energy Rydberg eV + E_KohnSham -16.143965114417 -219.649913869298 + E_Harris -16.143965110616 -219.649913817582 + E_Fermi -0.407267643914 -5.541160568271 + + PW ALGORITHM --------------- ION= 1 ELEC= 10-------------------------------- + K-point CG iter num Time(Sec) + 1 3.000000 0.629669 + + Density error is 0.000000034854 + Error Threshold = 0.000000002068 + + Energy Rydberg eV + E_KohnSham -16.143965129640 -219.649914076412 + E_Harris -16.143965128874 -219.649914065985 + E_Fermi -0.407195695883 -5.540181665084 + + PW ALGORITHM --------------- ION= 1 ELEC= 11-------------------------------- + K-point CG iter num Time(Sec) + 1 4.000000 0.813535 + + Density error is 0.000000008107 + Error Threshold = 0.000000000436 + + Energy Rydberg eV + E_KohnSham -16.143965128688 -219.649914063463 + E_Harris -16.143965130414 -219.649914086943 + E_Fermi -0.407157690601 -5.539664576698 + + PW ALGORITHM --------------- ION= 1 ELEC= 12-------------------------------- + K-point CG iter num Time(Sec) + 1 3.125000 0.674851 + + Density error is 0.000000001913 + Error Threshold = 0.000000000101 + + Energy Rydberg eV + E_KohnSham -16.143965127817 -219.649914051607 + E_Harris -16.143965128941 -219.649914066906 + E_Fermi -0.407141275858 -5.539441242659 + + PW ALGORITHM --------------- ION= 1 ELEC= 13-------------------------------- + K-point CG iter num Time(Sec) + 1 4.000000 0.804644 + + Density error is 0.000000000482 + Error Threshold = 0.000000000024 + + Energy Rydberg eV + E_KohnSham -16.143965127167 -219.649914042766 + E_Harris -16.143965127871 -219.649914052349 + E_band -5.963675568058 -81.139968748982 + E_one_elec -25.076858425354 -341.188162524121 + E_Hartree +13.701412040584 +186.417274397756 + E_xc -6.404563484424 -87.138556590897 + E_Ewald +1.636044742026 +22.259530674496 + E_demet -0.000000000000 -0.000000000000 + E_descf +0.000000000000 +0.000000000000 + E_efield +0.000000000000 +0.000000000000 + E_Fermi -0.407135332076 -5.539360373357 + charge density convergence is achieved + final etot is -219.649914042766 eV + + STATE ENERGY(eV) AND OCCUPATIONS. 1/1 kpoint (Cartesian) = 0.00000 0.00000 0.00000 (16879 pws) + [spin1_state] 1 -15.903159 2.000000 + [spin1_state] 2 -8.412500 2.000000 + [spin1_state] 3 -8.255893 2.000000 + [spin1_state] 4 -7.998432 2.000000 + [spin1_state] 5 -0.514947 0.000000 + [spin1_state] 6 2.727369 0.000000 + [spin1_state] 7 3.067094 0.000000 + [spin1_state] 8 4.824439 0.000000 + + + -------------------------------------------- + !FINAL_ETOT_IS -219.6499140427659142 eV + -------------------------------------------- + + + + + + + |CLASS_NAME---------|NAME---------------|TIME(Sec)-----|CALLS----|AVG------|PER%------- + A DC_Driv reading +0.104 1 +0.10 +0.59% + A DC_Driv divide_frag +0.19 1 +0.19 +1.09% + B PW_Basis gen_pw +0.19 1 +0.19 +1.09% + A DC_Driv solve_eachf +17.20 1 +17.20 +98.32% + B Run_Frag frag_pw_line +17.20 1 +17.20 +98.32% + X FFT FFT3D +9.69 1332 +0.01 +55.38% + E potential v_of_rho +3.20 14 +0.23 +18.27% + C wavefunc wfcinit +0.22 1 +0.22 +1.24% + G Hamilt_PW cinitcgg +2.31 14 +0.17 +13.20% + H Hamilt_PW h_psi +9.29 513 +0.02 +53.10% + I Hamilt_PW add_vuspsi +0.45 513 +0.00 +2.57% + C Ions opt_ions_pw +16.63 1 +16.63 +95.07% + D electrons self_consistent +14.90 1 +14.90 +85.15% + E electrons c_bands +10.25 13 +0.79 +58.60% + F Hamilt diago +10.08 13 +0.78 +57.59% + G Diago_CG diag +7.93 13 +0.61 +45.35% + E Charge mix_rho +0.35 13 +0.03 +1.98% + ---------------------------------------------------------------------------------------- + + CLASS_NAME---------|NAME---------------|MEMORY(MB)-------- + +29.4953 + PW_Basis struc_fac +4.1212 + Use_FFT porter +4.0000 + wavefunc evc +2.0604 + Charge rho +2.0000 + Charge rho_save +2.0000 + Charge rho_core +2.0000 + potential vltot +2.0000 + potential vr +2.0000 + potential vrs +2.0000 + potential vrs1 +2.0000 + potential vnew +2.0000 + Charge rhog +1.0303 + Charge rhog_save +1.0303 + Charge rhog_core +1.0303 + ---------------------------------------------------------- + + Start Time : Fri May 7 16:18:50 2021 + Finish Time : Fri May 7 16:19:08 2021 + Total Time : +0 h +0 mins +18 secs diff --git a/tests/test_abacus_pw_scf.py b/tests/test_abacus_pw_scf.py index 8d13dddcf..20751f819 100644 --- a/tests/test_abacus_pw_scf.py +++ b/tests/test_abacus_pw_scf.py @@ -146,5 +146,23 @@ def test_return_zero(self): self.assertEqual(len(self.system_ch4), 0) +class TestABACUSLabeledOutputNoFS(unittest.TestCase): + def setUp(self): + shutil.copy("abacus.scf/INPUT.ch4-noforcestress", "abacus.scf/INPUT") + + def tearDown(self): + if os.path.isfile("abacus.scf/INPUT"): + os.remove("abacus.scf/INPUT") + + def test_noforcestress_job(self): + # check below will not throw error + system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf") + # check the returned force is empty + self.assertFalse(system_ch4.data["forces"]) + self.assertTrue("virials" not in system_ch4.data) + # test append self + system_ch4.append(system_ch4) + + if __name__ == "__main__": unittest.main() From 1c6223939738769aaafaa28963bb596090611016 Mon Sep 17 00:00:00 2001 From: "A bot of @njzjz" <48687836+njzjz-bot@users.noreply.github.com> Date: Mon, 8 Jul 2024 22:12:38 -0400 Subject: [PATCH 05/24] fix: remove ref-names from .git_archival.txt (#679) Removing ref-names is now suggested by setuptools-scm, per https://setuptools-scm.readthedocs.io/en/latest/usage/#git-archives, as it may lead to the git archive checksum changing. Generated by the task: https://github.com/njzjz-bot/njzjz-bot/issues/6. --- .git_archival.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/.git_archival.txt b/.git_archival.txt index 8fb235d70..7c5100942 100644 --- a/.git_archival.txt +++ b/.git_archival.txt @@ -1,4 +1,3 @@ node: $Format:%H$ node-date: $Format:%cI$ describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ -ref-names: $Format:%D$ From 6227f49c1a50b60bb15ccafc40ca9a1abf862d32 Mon Sep 17 00:00:00 2001 From: "A bot of @njzjz" <48687836+njzjz-bot@users.noreply.github.com> Date: Tue, 16 Jul 2024 18:19:12 -0400 Subject: [PATCH 06/24] docs: pin sphinx-argparse to < 0.5.0 (#683) Pin sphinx-argparse to <0.5.0 due to sphinx-doc/sphinx-argparse#56. Generated by the task: https://github.com/njzjz-bot/njzjz-bot/issues/7. ## Summary by CodeRabbit - **Chores** - Updated `sphinx-argparse` dependency version to `<0.5.0` to ensure compatibility. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 35b30decf..05a6e243b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ docs = [ 'numpydoc', 'm2r2', 'deepmodeling-sphinx>=0.1.1', - 'sphinx-argparse', + 'sphinx-argparse<0.5.0', 'rdkit', 'jupyterlite-sphinx', 'jupyterlite-xeus', From c21c538fae0c17ee1fa5d555e3a5ca1d8c1c48f2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Jul 2024 09:03:13 +0800 Subject: [PATCH 07/24] [pre-commit.ci] pre-commit autoupdate (#677) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.4.10 → v0.5.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.10...v0.5.2) - [github.com/asottile/blacken-docs: 1.16.0 → 1.18.0](https://github.com/asottile/blacken-docs/compare/1.16.0...1.18.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d9e92b7b3..252fe57be 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.4.10 + rev: v0.5.2 hooks: - id: ruff args: ["--fix"] @@ -36,7 +36,7 @@ repos: args: ["--write"] # Python inside docs - repo: https://github.com/asottile/blacken-docs - rev: 1.16.0 + rev: 1.18.0 hooks: - id: blacken-docs ci: From cc4001a4cae68ad0e440996710638af58800d5d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Jul 2024 11:19:56 +0800 Subject: [PATCH 08/24] [pre-commit.ci] pre-commit autoupdate (#685) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.2 → v0.5.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.2...v0.5.4) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 252fe57be..373e0d222 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.2 + rev: v0.5.4 hooks: - id: ruff args: ["--fix"] From 42dd136c050dc21f82ce840677813544e6c98db0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 31 Jul 2024 17:46:37 -0400 Subject: [PATCH 09/24] Bump CodSpeedHQ/action from 2 to 3 (#691) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [CodSpeedHQ/action](https://github.com/codspeedhq/action) from 2 to 3.
Release notes

Sourced from CodSpeedHQ/action's releases.

v3.0.0

What's Changed

Check the runner release notes for more details.

Full Changelog: https://github.com/CodSpeedHQ/action/compare/v2...v3.0.0

v2.4.4

What's Changed

Full Changelog: https://github.com/CodSpeedHQ/action/compare/v2.4.3...v2.4.4

v2.4.3

What's Changed

Check the runner release notes for more details.

Full Changelog: https://github.com/CodSpeedHQ/action/compare/v2.4.2...v2.4.3

v2.4.2

What's Changed

Check the runner release notes for more details.

Full Changelog: https://github.com/CodSpeedHQ/action/compare/v2.4.1...v2.4.2

v2.4.1

What's Changed

Check the runner release notes for more details.

Full Changelog: https://github.com/CodSpeedHQ/action/compare/v2.4.0...v2.4.1

v2.4.0

What's Changed

Check the runner release notes for more details.

Full Changelog: https://github.com/CodSpeedHQ/action/compare/v2.3.1...v2.4.0

... (truncated)

Changelog

Sourced from CodSpeedHQ/action's changelog.

[3.0.0] - 2024-07-26

[2.4.4] - 2024-07-26

  • chore: report that a new version is available

[2.4.3] - 2024-07-12

[2.4.2] - 2024-06-14

[2.4.1] - 2024-04-29

[2.4.0] - 2024-04-26

[2.3.1] - 2024-04-24

[2.3.0] - 2024-02-31

[2.2.1] - 2024-02-22

[2.2.0] - 2024-02-22

[2.1.1] - 2024-02-02

[2.0.3] - 2024-01-04

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=CodSpeedHQ/action&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) Dependabot will merge this PR once CI passes on it, as requested by @njzjz. [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 25650184b..1dd682776 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -17,7 +17,7 @@ jobs: - name: Install dependencies run: uv pip install --system .[test,amber,ase,pymatgen,benchmark] rdkit openbabel-wheel - name: Run benchmarks - uses: CodSpeedHQ/action@v2 + uses: CodSpeedHQ/action@v3 with: token: ${{ secrets.CODSPEED_TOKEN }} run: pytest benchmark/ --codspeed From c565517191d4e26961664195406e1c407fc70c28 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 3 Aug 2024 16:29:13 +0800 Subject: [PATCH 10/24] [pre-commit.ci] pre-commit autoupdate (#690) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.4 → v0.5.5](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.4...v0.5.5) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 373e0d222..a82074bf8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.4 + rev: v0.5.5 hooks: - id: ruff args: ["--fix"] From 5b54e3d8a0521542a0fb4c68d538e2a98670936a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 16:45:53 +0800 Subject: [PATCH 11/24] [pre-commit.ci] pre-commit autoupdate (#694) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.5 → v0.5.6](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.5...v0.5.6) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a82074bf8..1bb2fcc02 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.5 + rev: v0.5.6 hooks: - id: ruff args: ["--fix"] From 3c19cedeba8e65cc93604cdf9900dcc58fa75100 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 Aug 2024 08:50:07 +0800 Subject: [PATCH 12/24] [pre-commit.ci] pre-commit autoupdate (#696) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.6 → v0.5.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.6...v0.5.7) ## Summary by CodeRabbit - **Chores** - Updated the Ruff pre-commit hook to the latest version (v0.5.7) for improved linting of Python code. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bb2fcc02..4f16df290 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.6 + rev: v0.5.7 hooks: - id: ruff args: ["--fix"] From cbcbb92ce959ff08647c5e2587599a54c9cc3663 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 Aug 2024 14:15:49 +0800 Subject: [PATCH 13/24] [pre-commit.ci] pre-commit autoupdate (#697) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.7 → v0.6.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.7...v0.6.1) ## Summary by CodeRabbit - **Chores** - Updated the version of the Ruff pre-commit hook from v0.5.7 to v0.6.1, potentially enhancing linting and formatting during pre-commit checks. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f16df290..c255e2b1c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.7 + rev: v0.6.1 hooks: - id: ruff args: ["--fix"] From 2fb0804116761aef072021e412b589be551c2f69 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 26 Aug 2024 20:53:38 -0400 Subject: [PATCH 14/24] docs: migrate from m2r2 to myst-parser (#701) ## Summary by CodeRabbit - **Documentation Updates** - Updated documentation generation tools to use `myst_parser`, enhancing Markdown compatibility and features. - Improved documentation processing and rendering capabilities. --- docs/conf.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index e3c0b3d4c..a21d19d56 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,7 +49,7 @@ "sphinx.ext.viewcode", "sphinx.ext.intersphinx", "numpydoc", - "m2r2", + "myst_parser", "sphinxarg.ext", "jupyterlite_sphinx", ] diff --git a/pyproject.toml b/pyproject.toml index 05a6e243b..4bf2e64b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ docs = [ 'recommonmark', 'sphinx_rtd_theme>=1.0.0rc1', 'numpydoc', - 'm2r2', + 'myst-parser', 'deepmodeling-sphinx>=0.1.1', 'sphinx-argparse<0.5.0', 'rdkit', From 0a50f4d5d62b070b92f708ace0c83b8281476d62 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Aug 2024 01:25:03 +0000 Subject: [PATCH 15/24] [pre-commit.ci] pre-commit autoupdate (#700) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.1 → v0.6.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.1...v0.6.2) ## Summary by CodeRabbit - **Chores** - Updated the version of the Ruff pre-commit hook to enhance performance and reliability. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c255e2b1c..f4abb50c3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.6.1 + rev: v0.6.2 hooks: - id: ruff args: ["--fix"] From 676517aa64b6b2825a6551655c79cd93449a15be Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Wed, 28 Aug 2024 21:34:11 -0400 Subject: [PATCH 16/24] feat: customized dtypes for unlabeled deepmd (#702) ## Summary by CodeRabbit - **New Features** - Enhanced flexibility in data type handling for system data processing based on the presence of labels. - New data type registration for "foo" with updated shape and configuration. - **Bug Fixes** - Improved adaptability and performance in data processing functions by refining dtype selection. - **Refactor** - Streamlined test structure for loading and dumping data types, improving organization and maintainability of test cases. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- dpdata/deepmd/comp.py | 76 ++++++++++++--------- dpdata/deepmd/hdf5.py | 13 +++- dpdata/deepmd/raw.py | 76 +++++++++++---------- tests/plugin/dpdata_plugin_test/__init__.py | 4 ++ tests/test_custom_data_type.py | 38 ++++++++--- 5 files changed, 126 insertions(+), 81 deletions(-) diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index ab0044477..d6ccf3ffa 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -67,40 +67,43 @@ def to_system_data(folder, type_map=None, labels=True): data["virials"] = np.concatenate(all_virs, axis=0) # allow custom dtypes if labels: - for dtype in dpdata.system.LabeledSystem.DTYPES: - if dtype.name in ( - "atom_numbs", - "atom_names", - "atom_types", - "orig", - "cells", - "coords", - "real_atom_types", - "real_atom_names", - "nopbc", - "energies", - "forces", - "virials", - ): - # skip as these data contains specific rules - continue - if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): - warnings.warn( - f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format." - ) - continue - natoms = data["coords"].shape[1] - shape = [ - natoms if xx == dpdata.system.Axis.NATOMS else xx - for xx in dtype.shape[1:] - ] - all_data = [] - for ii in sets: - tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy")) - if tmp is not None: - all_data.append(np.reshape(tmp, [tmp.shape[0], *shape])) - if len(all_data) > 0: - data[dtype.name] = np.concatenate(all_data, axis=0) + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + + for dtype in dtypes: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format." + ) + continue + natoms = data["coords"].shape[1] + shape = [ + natoms if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:] + ] + all_data = [] + for ii in sets: + tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy")) + if tmp is not None: + all_data.append(np.reshape(tmp, [tmp.shape[0], *shape])) + if len(all_data) > 0: + data[dtype.name] = np.concatenate(all_data, axis=0) return data @@ -173,6 +176,11 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass # allow custom dtypes + labels = "energies" in data + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES for dtype in dpdata.system.LabeledSystem.DTYPES: if dtype.name in ( "atom_numbs", diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py index 34ae9dbef..78f4e33fe 100644 --- a/dpdata/deepmd/hdf5.py +++ b/dpdata/deepmd/hdf5.py @@ -102,7 +102,11 @@ def to_system_data( }, } # allow custom dtypes - for dtype in dpdata.system.LabeledSystem.DTYPES: + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", @@ -210,8 +214,13 @@ def dump( "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True}, } + labels = "energies" in data + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES # allow custom dtypes - for dtype in dpdata.system.LabeledSystem.DTYPES: + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index e772714a1..11a9ff6ed 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -64,40 +64,41 @@ def to_system_data(folder, type_map=None, labels=True): data["nopbc"] = True # allow custom dtypes if labels: - for dtype in dpdata.system.LabeledSystem.DTYPES: - if dtype.name in ( - "atom_numbs", - "atom_names", - "atom_types", - "orig", - "cells", - "coords", - "real_atom_types", - "real_atom_names", - "nopbc", - "energies", - "forces", - "virials", - ): - # skip as these data contains specific rules - continue - if not ( - len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES - ): - warnings.warn( - f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format." - ) - continue - natoms = data["coords"].shape[1] - shape = [ - natoms if xx == dpdata.system.Axis.NATOMS else xx - for xx in dtype.shape[1:] - ] - if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")): - data[dtype.name] = np.reshape( - np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")), - [nframes, *shape], - ) + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + for dtype in dtypes: + if dtype.name in ( + "atom_numbs", + "atom_names", + "atom_types", + "orig", + "cells", + "coords", + "real_atom_types", + "real_atom_names", + "nopbc", + "energies", + "forces", + "virials", + ): + # skip as these data contains specific rules + continue + if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES): + warnings.warn( + f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format." + ) + continue + natoms = data["coords"].shape[1] + shape = [ + natoms if xx == dpdata.system.Axis.NATOMS else xx + for xx in dtype.shape[1:] + ] + if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")): + data[dtype.name] = np.reshape( + np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")), + [nframes, *shape], + ) return data else: raise RuntimeError("not dir " + folder) @@ -144,7 +145,12 @@ def dump(folder, data): with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass # allow custom dtypes - for dtype in dpdata.system.LabeledSystem.DTYPES: + labels = "energies" in data + if labels: + dtypes = dpdata.system.LabeledSystem.DTYPES + else: + dtypes = dpdata.system.System.DTYPES + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", diff --git a/tests/plugin/dpdata_plugin_test/__init__.py b/tests/plugin/dpdata_plugin_test/__init__.py index ef26e7c1d..4e445eb2f 100644 --- a/tests/plugin/dpdata_plugin_test/__init__.py +++ b/tests/plugin/dpdata_plugin_test/__init__.py @@ -10,6 +10,10 @@ DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False), labeled=True ) +register_data_type( + DataType("foo", np.ndarray, (Axis.NFRAMES, 3, 3), required=False), labeled=False +) + register_data_type( DataType("bar", np.ndarray, (Axis.NFRAMES, Axis.NATOMS, -1), required=False), labeled=True, diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py index e94ba5e0f..a319af35f 100644 --- a/tests/test_custom_data_type.py +++ b/tests/test_custom_data_type.py @@ -9,10 +9,12 @@ from dpdata.data_type import Axis, DataType -class TestDeepmdLoadDumpComp(unittest.TestCase): +class DeepmdLoadDumpCompTest: def setUp(self): - self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") - self.foo = np.ones((len(self.system), 2, 4)) + self.system = self.cls( + data=dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar").data + ) + self.foo = np.ones((len(self.system), *self.shape)) self.system.data["foo"] = self.foo self.system.check_data() @@ -23,7 +25,7 @@ def test_to_deepmd_raw(self): def test_from_deepmd_raw(self): self.system.to_deepmd_raw("data_foo") - x = dpdata.LabeledSystem("data_foo", fmt="deepmd/raw") + x = self.cls("data_foo", fmt="deepmd/raw") np.testing.assert_allclose(x.data["foo"], self.foo) def test_to_deepmd_npy(self): @@ -33,7 +35,7 @@ def test_to_deepmd_npy(self): def test_from_deepmd_npy(self): self.system.to_deepmd_npy("data_foo") - x = dpdata.LabeledSystem("data_foo", fmt="deepmd/npy") + x = self.cls("data_foo", fmt="deepmd/npy") np.testing.assert_allclose(x.data["foo"], self.foo) def test_to_deepmd_hdf5(self): @@ -44,18 +46,34 @@ def test_to_deepmd_hdf5(self): def test_from_deepmd_hdf5(self): self.system.to_deepmd_hdf5("data_foo.h5") - x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5") + x = self.cls("data_foo.h5", fmt="deepmd/hdf5") np.testing.assert_allclose(x.data["foo"], self.foo) def test_duplicated_data_type(self): - dt = DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False) - n_dtypes_old = len(dpdata.LabeledSystem.DTYPES) + dt = DataType("foo", np.ndarray, (Axis.NFRAMES, *self.shape), required=False) + n_dtypes_old = len(self.cls.DTYPES) with self.assertWarns(UserWarning): - dpdata.LabeledSystem.register_data_type(dt) - n_dtypes_new = len(dpdata.LabeledSystem.DTYPES) + self.cls.register_data_type(dt) + n_dtypes_new = len(self.cls.DTYPES) self.assertEqual(n_dtypes_old, n_dtypes_new) +class TestDeepmdLoadDumpCompUnlabeled(unittest.TestCase, DeepmdLoadDumpCompTest): + cls = dpdata.System + shape = (3, 3) + + def setUp(self): + DeepmdLoadDumpCompTest.setUp(self) + + +class TestDeepmdLoadDumpCompLabeled(unittest.TestCase, DeepmdLoadDumpCompTest): + cls = dpdata.LabeledSystem + shape = (2, 4) + + def setUp(self): + DeepmdLoadDumpCompTest.setUp(self) + + class TestDeepmdLoadDumpCompAny(unittest.TestCase): def setUp(self): self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar") From f8a1b6b20eb9ac7f1aa6d20ee806a59e6fb00c11 Mon Sep 17 00:00:00 2001 From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com> Date: Sat, 31 Aug 2024 18:33:46 +0800 Subject: [PATCH 17/24] test: support comparison between two multi systems (#705) ## Summary by CodeRabbit - **New Features** - Introduced functions and classes to enhance testing capabilities for multi-system comparisons. - Added validation classes for periodic boundary conditions across multi-system objects. - **Bug Fixes** - Updated test classes to utilize new multi-system handling, improving clarity and functionality. - **Documentation** - Enhanced clarity in variable naming for better alignment with multi-system concepts. --------- Co-authored-by: Han Wang --- tests/comp_sys.py | 84 ++++++++++++++++++++++++++++++++++++++ tests/test_deepmd_mixed.py | 26 +++++++----- 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/tests/comp_sys.py b/tests/comp_sys.py index 99879af61..a3a916a09 100644 --- a/tests/comp_sys.py +++ b/tests/comp_sys.py @@ -105,6 +105,72 @@ def test_virial(self): ) +def _make_comp_ms_test_func(comp_sys_test_func): + """ + Dynamically generates a test function for multi-system comparisons. + + Args: + comp_sys_test_func (Callable): The original test function for single systems. + + Returns + ------- + Callable: A new test function that can handle comparisons between multi-systems. + """ + + def comp_ms_test_func(iobj): + assert hasattr(iobj, "ms_1") and hasattr( + iobj, "ms_2" + ), "Multi-system objects must be present" + iobj.assertEqual(len(iobj.ms_1), len(iobj.ms_2)) + keys = [ii.formula for ii in iobj.ms_1] + keys_2 = [ii.formula for ii in iobj.ms_2] + assert sorted(keys) == sorted( + keys_2 + ), f"Keys of two MS are not equal: {keys} != {keys_2}" + for kk in keys: + iobj.system_1 = iobj.ms_1[kk] + iobj.system_2 = iobj.ms_2[kk] + comp_sys_test_func(iobj) + del iobj.system_1 + del iobj.system_2 + + return comp_ms_test_func + + +def _make_comp_ms_class(comp_class): + """ + Dynamically generates a test class for multi-system comparisons. + + Args: + comp_class (type): The original test class for single systems. + + Returns + ------- + type: A new test class that can handle comparisons between multi-systems. + """ + + class CompMS: + pass + + test_methods = [ + func + for func in dir(comp_class) + if callable(getattr(comp_class, func)) and func.startswith("test_") + ] + + for func in test_methods: + setattr(CompMS, func, _make_comp_ms_test_func(getattr(comp_class, func))) + + return CompMS + + +# MultiSystems comparison from single System comparison +CompMultiSys = _make_comp_ms_class(CompSys) + +# LabeledMultiSystems comparison from single LabeledSystem comparison +CompLabeledMultiSys = _make_comp_ms_class(CompLabeledSys) + + class MultiSystems: def test_systems_name(self): self.assertEqual(set(self.systems.systems), set(self.system_names)) @@ -127,3 +193,21 @@ class IsNoPBC: def test_is_nopbc(self): self.assertTrue(self.system_1.nopbc) self.assertTrue(self.system_2.nopbc) + + +class MSAllIsPBC: + def test_is_pbc(self): + assert hasattr(self, "ms_1") and hasattr( + self, "ms_2" + ), "Multi-system objects must be present and iterable" + self.assertTrue(all([not ss.nopbc for ss in self.ms_1])) + self.assertTrue(all([not ss.nopbc for ss in self.ms_2])) + + +class MSAllIsNoPBC: + def test_is_nopbc(self): + assert hasattr(self, "ms_1") and hasattr( + self, "ms_2" + ), "Multi-system objects must be present and iterable" + self.assertTrue(all([ss.nopbc for ss in self.ms_1])) + self.assertTrue(all([ss.nopbc for ss in self.ms_2])) diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py index 70a09dbe8..3c28d1b10 100644 --- a/tests/test_deepmd_mixed.py +++ b/tests/test_deepmd_mixed.py @@ -6,12 +6,18 @@ from glob import glob import numpy as np -from comp_sys import CompLabeledSys, IsNoPBC, MultiSystems +from comp_sys import ( + CompLabeledMultiSys, + CompLabeledSys, + IsNoPBC, + MSAllIsNoPBC, + MultiSystems, +) from context import dpdata class TestMixedMultiSystemsDumpLoad( - unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC ): def setUp(self): self.places = 6 @@ -62,8 +68,8 @@ def setUp(self): self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") self.systems = dpdata.MultiSystems() self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") - self.system_1 = self.ms["C1H4A0B0D0"] - self.system_2 = self.systems["C1H4A0B0D0"] + self.ms_1 = self.ms + self.ms_2 = self.systems mixed_sets = glob("tmp.deepmd.mixed/*/set.*") self.assertEqual(len(mixed_sets), 2) for i in mixed_sets: @@ -112,7 +118,7 @@ def test_str(self): class TestMixedMultiSystemsDumpLoadSetSize( - unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC ): def setUp(self): self.places = 6 @@ -163,8 +169,8 @@ def setUp(self): self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") self.systems = dpdata.MultiSystems() self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") - self.system_1 = self.ms["C1H4A0B0D0"] - self.system_2 = self.systems["C1H4A0B0D0"] + self.ms_1 = self.ms + self.ms_2 = self.systems mixed_sets = glob("tmp.deepmd.mixed/*/set.*") self.assertEqual(len(mixed_sets), 5) for i in mixed_sets: @@ -213,7 +219,7 @@ def test_str(self): class TestMixedMultiSystemsTypeChange( - unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC ): def setUp(self): self.places = 6 @@ -265,8 +271,8 @@ def setUp(self): self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") self.systems = dpdata.MultiSystems(type_map=["TOKEN"]) self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed") - self.system_1 = self.ms["TOKEN0C1H4A0B0D0"] - self.system_2 = self.systems["TOKEN0C1H4A0B0D0"] + self.ms_1 = self.ms + self.ms_2 = self.systems mixed_sets = glob("tmp.deepmd.mixed/*/set.*") self.assertEqual(len(mixed_sets), 2) for i in mixed_sets: From 6d082f162dc05c0980567a7544a8024b7c024a78 Mon Sep 17 00:00:00 2001 From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com> Date: Sat, 31 Aug 2024 18:39:10 +0800 Subject: [PATCH 18/24] test: mixed data format: test if the index_map (when type_map is provided) works (#706) consider the PR after #705 ## Summary by CodeRabbit - **New Features** - Enhanced testing framework for multi-system comparisons, allowing for dynamic generation of test functions and classes. - Introduced new test classes to validate properties of multi-system objects regarding periodic boundary conditions. - Added a new test class for handling type mapping in labeled systems. - **Bug Fixes** - Updated existing test classes to improve clarity and consistency in naming conventions for multi-system variables. --------- Co-authored-by: Han Wang --- tests/test_deepmd_mixed.py | 113 +++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py index 3c28d1b10..9f0c5ed46 100644 --- a/tests/test_deepmd_mixed.py +++ b/tests/test_deepmd_mixed.py @@ -117,6 +117,119 @@ def test_str(self): ) +class TestMixedMultiSystemsDumpLoadTypeMap( + unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC +): + def setUp(self): + self.places = 6 + self.e_places = 6 + self.f_places = 6 + self.v_places = 6 + + # C1H4 + system_1 = dpdata.LabeledSystem( + "gaussian/methane.gaussianlog", fmt="gaussian/log" + ) + + # C1H3 + system_2 = dpdata.LabeledSystem( + "gaussian/methane_sub.gaussianlog", fmt="gaussian/log" + ) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1B2 + system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 2, 1] + tmp_data["atom_names"] = ["C", "H", "A", "B"] + tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3]) + # C1H1A2B1 + system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data) + + tmp_data = system_1.data.copy() + tmp_data["atom_numbs"] = [1, 1, 1, 2] + tmp_data["atom_names"] = ["C", "H", "A", "D"] + tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3]) + # C1H1A1C2 + system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data) + + self.ms = dpdata.MultiSystems( + system_1, + system_2, + system_1_modified_type_1, + system_1_modified_type_2, + system_1_modified_type_3, + ) + + self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed") + self.place_holder_ms = dpdata.MultiSystems() + self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy") + + new_type_map = ["H", "C", "D", "A", "B"] + self.systems = dpdata.MultiSystems() + self.systems.from_deepmd_npy_mixed( + "tmp.deepmd.mixed", fmt="deepmd/npy/mixed", type_map=new_type_map + ) + for kk in [ii.formula for ii in self.ms]: + # apply type_map to each system + self.ms[kk].apply_type_map(new_type_map) + # revise keys in dict according because the type_map is updated. + tmp_ss = self.ms.systems.pop(kk) + self.ms.systems[tmp_ss.formula] = tmp_ss + + self.ms_1 = self.ms + self.ms_2 = self.systems + mixed_sets = glob("tmp.deepmd.mixed/*/set.*") + self.assertEqual(len(mixed_sets), 2) + for i in mixed_sets: + self.assertEqual( + os.path.exists(os.path.join(i, "real_atom_types.npy")), True + ) + + self.system_names = [ + "H4C1D0A0B0", + "H3C1D0A0B0", + "H1C1D0A1B2", + "H1C1D0A2B1", + "H1C1D2A1B0", + ] + self.system_sizes = { + "H4C1D0A0B0": 1, + "H3C1D0A0B0": 1, + "H1C1D0A1B2": 1, + "H1C1D0A2B1": 1, + "H1C1D2A1B0": 1, + } + self.atom_names = ["H", "C", "D", "A", "B"] + + def tearDown(self): + if os.path.exists("tmp.deepmd.mixed"): + shutil.rmtree("tmp.deepmd.mixed") + + def test_len(self): + self.assertEqual(len(self.ms), 5) + self.assertEqual(len(self.place_holder_ms), 2) + self.assertEqual(len(self.systems), 5) + + def test_get_nframes(self): + self.assertEqual(self.ms.get_nframes(), 5) + self.assertEqual(self.place_holder_ms.get_nframes(), 5) + self.assertEqual(self.systems.get_nframes(), 5) + + def test_str(self): + self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)") + self.assertEqual( + str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)" + ) + self.assertEqual( + str(self.systems), "MultiSystems (5 systems containing 5 frames)" + ) + + class TestMixedMultiSystemsDumpLoadSetSize( unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC ): From 35fdbb8df25e79fd50c072f17ec2ddf903bcb9d9 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 31 Aug 2024 07:59:26 -0400 Subject: [PATCH 19/24] fix: reuse regular methods for deepmd/mixed (#704) ## Summary by CodeRabbit - **New Features** - Streamlined data loading process with improved handling of atom types. - Enhanced modularization in data dumping, reducing complexity and improving maintainability. - Added a new test for verifying data conversion and loading processes for mixed format systems. - **Bug Fixes** - Improved clarity and efficiency in processing atom names and counts. --------- Signed-off-by: Jinzhe Zeng Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com> --- dpdata/deepmd/comp.py | 2 - dpdata/deepmd/mixed.py | 179 ++++----------------------------- dpdata/deepmd/raw.py | 10 +- tests/test_custom_data_type.py | 10 ++ 4 files changed, 31 insertions(+), 170 deletions(-) diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index d6ccf3ffa..661110162 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -79,7 +79,6 @@ def to_system_data(folder, type_map=None, labels=True): "orig", "cells", "coords", - "real_atom_types", "real_atom_names", "nopbc", "energies", @@ -189,7 +188,6 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): "orig", "cells", "coords", - "real_atom_types", "real_atom_names", "nopbc", "energies", diff --git a/dpdata/deepmd/mixed.py b/dpdata/deepmd/mixed.py index ccf9b2f58..2aff3a28a 100644 --- a/dpdata/deepmd/mixed.py +++ b/dpdata/deepmd/mixed.py @@ -1,54 +1,16 @@ from __future__ import annotations import copy -import glob -import os -import shutil import numpy as np - -def load_type(folder): - data = {} - data["atom_names"] = [] - # if find type_map.raw, use it - assert os.path.isfile( - os.path.join(folder, "type_map.raw") - ), "Mixed type system must have type_map.raw!" - with open(os.path.join(folder, "type_map.raw")) as fp: - data["atom_names"] = fp.read().split() - - return data - - -def formula(atom_names, atom_numbs): - """Return the formula of this system, like C3H5O2.""" - return "".join([f"{symbol}{numb}" for symbol, numb in zip(atom_names, atom_numbs)]) - - -def _cond_load_data(fname): - tmp = None - if os.path.isfile(fname): - tmp = np.load(fname) - return tmp - - -def _load_set(folder, nopbc: bool): - coords = np.load(os.path.join(folder, "coord.npy")) - if nopbc: - cells = np.zeros((coords.shape[0], 3, 3)) - else: - cells = np.load(os.path.join(folder, "box.npy")) - eners = _cond_load_data(os.path.join(folder, "energy.npy")) - forces = _cond_load_data(os.path.join(folder, "force.npy")) - virs = _cond_load_data(os.path.join(folder, "virial.npy")) - real_atom_types = np.load(os.path.join(folder, "real_atom_types.npy")) - return cells, coords, eners, forces, virs, real_atom_types +from .comp import dump as comp_dump +from .comp import to_system_data as comp_to_system_data def to_system_data(folder, type_map=None, labels=True): + data = comp_to_system_data(folder, type_map, labels) # data is empty - data = load_type(folder) old_type_map = data["atom_names"].copy() if type_map is not None: assert isinstance(type_map, list) @@ -60,50 +22,16 @@ def to_system_data(folder, type_map=None, labels=True): data["atom_names"] = type_map.copy() else: index_map = None - data["orig"] = np.zeros([3]) - if os.path.isfile(os.path.join(folder, "nopbc")): - data["nopbc"] = True - sets = sorted(glob.glob(os.path.join(folder, "set.*"))) - all_cells = [] - all_coords = [] - all_eners = [] - all_forces = [] - all_virs = [] - all_real_atom_types = [] - for ii in sets: - cells, coords, eners, forces, virs, real_atom_types = _load_set( - ii, data.get("nopbc", False) - ) - nframes = np.reshape(cells, [-1, 3, 3]).shape[0] - all_cells.append(np.reshape(cells, [nframes, 3, 3])) - all_coords.append(np.reshape(coords, [nframes, -1, 3])) - if index_map is None: - all_real_atom_types.append(np.reshape(real_atom_types, [nframes, -1])) - else: - all_real_atom_types.append( - np.reshape(index_map[real_atom_types], [nframes, -1]) - ) - if eners is not None: - eners = np.reshape(eners, [nframes]) - if labels: - if eners is not None and eners.size > 0: - all_eners.append(np.reshape(eners, [nframes])) - if forces is not None and forces.size > 0: - all_forces.append(np.reshape(forces, [nframes, -1, 3])) - if virs is not None and virs.size > 0: - all_virs.append(np.reshape(virs, [nframes, 3, 3])) - all_cells_concat = np.concatenate(all_cells, axis=0) - all_coords_concat = np.concatenate(all_coords, axis=0) - all_real_atom_types_concat = np.concatenate(all_real_atom_types, axis=0) - all_eners_concat = None - all_forces_concat = None - all_virs_concat = None - if len(all_eners) > 0: - all_eners_concat = np.concatenate(all_eners, axis=0) - if len(all_forces) > 0: - all_forces_concat = np.concatenate(all_forces, axis=0) - if len(all_virs) > 0: - all_virs_concat = np.concatenate(all_virs, axis=0) + all_real_atom_types_concat = data.pop("real_atom_types").astype(int) + if index_map is not None: + all_real_atom_types_concat = index_map[all_real_atom_types_concat] + all_cells_concat = data["cells"] + all_coords_concat = data["coords"] + if labels: + all_eners_concat = data.get("energies") + all_forces_concat = data.get("forces") + all_virs_concat = data.get("virials") + data_list = [] while True: if all_real_atom_types_concat.size == 0: @@ -143,20 +71,6 @@ def to_system_data(folder, type_map=None, labels=True): def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True): - os.makedirs(folder, exist_ok=True) - sets = sorted(glob.glob(os.path.join(folder, "set.*"))) - if len(sets) > 0: - if remove_sets: - for ii in sets: - shutil.rmtree(ii) - else: - raise RuntimeError( - "found " - + str(sets) - + " in " - + folder - + "not a clean deepmd raw dir. please firstly clean set.* then try compress" - ) # if not converted to mixed if "real_atom_types" not in data: from dpdata import LabeledSystem, System @@ -169,69 +83,10 @@ def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True): else: temp_sys = System(data=data) temp_sys.convert_to_mixed_type() - # dump raw - np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d") - np.savetxt(os.path.join(folder, "type_map.raw"), data["real_atom_names"], fmt="%s") - # BondOrder System - if "bonds" in data: - np.savetxt( - os.path.join(folder, "bonds.raw"), - data["bonds"], - header="begin_atom, end_atom, bond_order", - ) - if "formal_charges" in data: - np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"]) - # reshape frame properties and convert prec - nframes = data["cells"].shape[0] - cells = np.reshape(data["cells"], [nframes, 9]).astype(comp_prec) - coords = np.reshape(data["coords"], [nframes, -1]).astype(comp_prec) - eners = None - forces = None - virials = None - real_atom_types = None - if "energies" in data: - eners = np.reshape(data["energies"], [nframes]).astype(comp_prec) - if "forces" in data: - forces = np.reshape(data["forces"], [nframes, -1]).astype(comp_prec) - if "virials" in data: - virials = np.reshape(data["virials"], [nframes, 9]).astype(comp_prec) - if "atom_pref" in data: - atom_pref = np.reshape(data["atom_pref"], [nframes, -1]).astype(comp_prec) - if "real_atom_types" in data: - real_atom_types = np.reshape(data["real_atom_types"], [nframes, -1]).astype( - np.int64 - ) - # dump frame properties: cell, coord, energy, force and virial - nsets = nframes // set_size - if set_size * nsets < nframes: - nsets += 1 - for ii in range(nsets): - set_stt = ii * set_size - set_end = (ii + 1) * set_size - set_folder = os.path.join(folder, "set.%06d" % ii) - os.makedirs(set_folder) - np.save(os.path.join(set_folder, "box"), cells[set_stt:set_end]) - np.save(os.path.join(set_folder, "coord"), coords[set_stt:set_end]) - if eners is not None: - np.save(os.path.join(set_folder, "energy"), eners[set_stt:set_end]) - if forces is not None: - np.save(os.path.join(set_folder, "force"), forces[set_stt:set_end]) - if virials is not None: - np.save(os.path.join(set_folder, "virial"), virials[set_stt:set_end]) - if real_atom_types is not None: - np.save( - os.path.join(set_folder, "real_atom_types"), - real_atom_types[set_stt:set_end], - ) - if "atom_pref" in data: - np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end]) - try: - os.remove(os.path.join(folder, "nopbc")) - except OSError: - pass - if data.get("nopbc", False): - with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: - pass + + data = data.copy() + data["atom_names"] = data.pop("real_atom_names") + comp_dump(folder, data, set_size, comp_prec, remove_sets) def mix_system(*system, type_map, **kwargs): diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index 11a9ff6ed..717c73f96 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -14,9 +14,6 @@ def load_type(folder, type_map=None): int ) ntypes = np.max(data["atom_types"]) + 1 - data["atom_numbs"] = [] - for ii in range(ntypes): - data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) data["atom_names"] = [] # if find type_map.raw, use it if os.path.isfile(os.path.join(folder, "type_map.raw")): @@ -30,9 +27,10 @@ def load_type(folder, type_map=None): my_type_map = [] for ii in range(ntypes): my_type_map.append("Type_%d" % ii) - assert len(my_type_map) >= len(data["atom_numbs"]) - for ii in range(len(data["atom_numbs"])): - data["atom_names"].append(my_type_map[ii]) + data["atom_names"] = my_type_map + data["atom_numbs"] = [] + for ii, _ in enumerate(data["atom_names"]): + data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii)) return data diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py index a319af35f..230475ab3 100644 --- a/tests/test_custom_data_type.py +++ b/tests/test_custom_data_type.py @@ -57,6 +57,16 @@ def test_duplicated_data_type(self): n_dtypes_new = len(self.cls.DTYPES) self.assertEqual(n_dtypes_old, n_dtypes_new) + def test_to_deepmd_npy_mixed(self): + ms = dpdata.MultiSystems(self.system) + ms.to_deepmd_npy_mixed("data_foo_mixed") + x = dpdata.MultiSystems().load_systems_from_file( + "data_foo_mixed", + fmt="deepmd/npy/mixed", + labeled=issubclass(self.cls, dpdata.LabeledSystem), + ) + np.testing.assert_allclose(list(x.systems.values())[0].data["foo"], self.foo) + class TestDeepmdLoadDumpCompUnlabeled(unittest.TestCase, DeepmdLoadDumpCompTest): cls = dpdata.System From f4d0adf8ca3aceab813f0bab0fdaeb9b55bf6ed6 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 31 Aug 2024 08:45:45 -0400 Subject: [PATCH 20/24] docs: reformat README and documentation (#707) Move most of the contents from `README.md` to `docs`. ## Summary by CodeRabbit - **New Features** - Introduced `BondOrderSystem` class for managing chemical bonds and formal charges. - Added `MultiSystems` class for handling multiple system data from files or directories. - Implemented new `deepmd/npy/mixed` format for combining systems with different formulas. - Launched plugin documentation to enhance extensibility for user-created plugins. - **Documentation** - Updated README for clearer installation instructions and streamlined content. - Created dedicated installation guide for user-friendly setup. - Enhanced documentation structure with new entries and improved navigation. --------- Signed-off-by: Jinzhe Zeng --- README.md | 323 +++--------------------------- docs/index.rst | 9 +- docs/installation.md | 13 ++ docs/plugin.md | 9 + docs/systems/bond_order_system.md | 51 +++++ docs/systems/index.rst | 11 + docs/systems/mixed.md | 24 +++ docs/systems/multi.md | 62 ++++++ docs/systems/system.md | 112 +++++++++++ 9 files changed, 312 insertions(+), 302 deletions(-) create mode 100644 docs/installation.md create mode 100644 docs/plugin.md create mode 100644 docs/systems/bond_order_system.md create mode 100644 docs/systems/index.rst create mode 100644 docs/systems/mixed.md create mode 100644 docs/systems/multi.md create mode 100644 docs/systems/system.md diff --git a/README.md b/README.md index ab489a75e..6f9246b1c 100644 --- a/README.md +++ b/README.md @@ -4,320 +4,47 @@ [![pip install](https://img.shields.io/pypi/dm/dpdata?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdata) [![Documentation Status](https://readthedocs.org/projects/dpdata/badge/)](https://dpdata.readthedocs.io/) -**dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian. -dpdata only works with python 3.7 or above. - +**dpdata** is a Python package for manipulating atomistic data of software in computational science. ## Installation -One can download the source code of dpdata by -```bash -git clone https://github.com/deepmodeling/dpdata.git dpdata -``` -then use `pip` to install the module from source -```bash -cd dpdata -pip install . -``` - -`dpdata` can also by install via pip without source -```bash -pip install dpdata -``` - -## Quick start - -This section gives some examples on how dpdata works. Firstly one needs to import the module in a python 3.x compatible code. -```python -import dpdata -``` -The typicall workflow of `dpdata` is - -1. Load data from vasp or lammps or deepmd-kit data files. -2. Manipulate data -3. Dump data to in a desired format - - -### Load data -```python -d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar") -``` -or let dpdata infer the format (`vasp/poscar`) of the file from the file name extension -```python -d_poscar = dpdata.System("my.POSCAR") -``` -The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data `System` called `d_poscar`. -A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`. -It is noted that `POSCAR` only contains one frame. -If the multiple frames stored in, for example, a `OUTCAR` is wanted, -```python -d_outcar = dpdata.LabeledSystem("OUTCAR") -``` -The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any), are loaded by `LabeledSystem`. It is noted that the forces of atoms are always assumed to exist. `LabeledSystem` is a derived class of `System`. +DP-GEN only supports Python 3.7 and above. You can [setup a conda/pip environment](https://docs.deepmodeling.com/faq/conda.html), and then use one of the following methods to install DP-GEN: -The `System` or `LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`: +- Install via pip: `pip install dpdata` +- Install via conda: `conda install -c conda-forge dpdata` +- Install from source code: `git clone https://github.com/deepmodeling/dpdata && pip install ./dpdata` -| Software| format | multi frames | labeled | class | format key | -| ------- | :--- | :---: | :---: | :--- | :--- | -| vasp | poscar | False | False | System | 'vasp/poscar' | -| vasp | outcar | True | True | LabeledSystem | 'vasp/outcar' | -| vasp | xml | True | True | LabeledSystem | 'vasp/xml' | -| lammps | lmp | False | False | System | 'lammps/lmp' | -| lammps | dump | True | False | System | 'lammps/dump' | -| deepmd | raw | True | False | System | 'deepmd/raw' | -| deepmd | npy | True | False | System | 'deepmd/npy' | -| deepmd | raw | True | True | LabeledSystem | 'deepmd/raw' | -| deepmd | npy | True | True | LabeledSystem | 'deepmd/npy' | -| deepmd | npy | True | True | MultiSystems | 'deepmd/npy/mixed' | -| deepmd | npy | True | False | MultiSystems | 'deepmd/npy/mixed' | -| gaussian| log | False | True | LabeledSystem | 'gaussian/log'| -| gaussian| log | True | True | LabeledSystem | 'gaussian/md' | -| siesta | output | False | True | LabeledSystem | 'siesta/output'| -| siesta | aimd_output | True | True | LabeledSystem | 'siesta/aimd_output' | -| cp2k(deprecated in future) | output | False | True | LabeledSystem | 'cp2k/output' | -| cp2k(deprecated in future) | aimd_output | True | True | LabeledSystem | 'cp2k/aimd_output' | -| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata)) | stdout | False | True | LabeledSystem | 'cp2kdata/e_f' | -| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata)) | stdout | True | True | LabeledSystem | 'cp2kdata/md' | -| QE | log | False | True | LabeledSystem | 'qe/pw/scf' | -| QE | log | True | False | System | 'qe/cp/traj' | -| QE | log | True | True | LabeledSystem | 'qe/cp/traj' | -| Fhi-aims| output | True | True | LabeledSystem | 'fhi_aims/md' | -| Fhi-aims| output | False | True | LabeledSystem | 'fhi_aims/scf' | -|quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'| -| PWmat | atom.config | False | False | System | 'pwmat/atom.config' | -| PWmat | movement | True | True | LabeledSystem | 'pwmat/movement' | -| PWmat | OUT.MLMD | True | True | LabeledSystem | 'pwmat/out.mlmd' | -| Amber | multi | True | True | LabeledSystem | 'amber/md' | -| Amber/sqm | sqm.out | False | False | System | 'sqm/out' | -| Gromacs | gro | True | False | System | 'gromacs/gro' | -| ABACUS | STRU | False | False | System | 'abacus/stru' | -| ABACUS | STRU | False | True | LabeledSystem | 'abacus/scf' | -| ABACUS | cif | True | True | LabeledSystem | 'abacus/md' | -| ABACUS | STRU | True | True | LabeledSystem | 'abacus/relax' | -| ase | structure | True | True | MultiSystems | 'ase/structure' | -| DFTB+ | dftbplus | False | True | LabeledSystem | 'dftbplus' | -| n2p2 | n2p2 | True | True | LabeledSystem | 'n2p2' | - - -The Class `dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems. - -Use `dpdata.MultiSystems.from_dir` to read from a directory, `dpdata.MultiSystems` will walk in the directory -Recursively and find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports. - -Use `dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats. - -For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type. - -The following commands relating to `Class dpdata.MultiSystems` may be useful. -```python -# load data - -xyz_multi_systems = dpdata.MultiSystems.from_file( - file_name="tests/xyz/xyz_unittest.xyz", fmt="quip/gap/xyz" -) -vasp_multi_systems = dpdata.MultiSystems.from_dir( - dir_name="./mgal_outcar", file_name="OUTCAR", fmt="vasp/outcar" -) - -# use wildcard -vasp_multi_systems = dpdata.MultiSystems.from_dir( - dir_name="./mgal_outcar", file_name="*OUTCAR", fmt="vasp/outcar" -) - -# print the multi_system infomation -print(xyz_multi_systems) -print(xyz_multi_systems.systems) # return a dictionaries - -# print the system infomation -print(xyz_multi_systems.systems["B1C9"].data) - -# dump a system's data to ./my_work_dir/B1C9_raw folder -xyz_multi_systems.systems["B1C9"].to_deepmd_raw("./my_work_dir/B1C9_raw") - -# dump all systems -xyz_multi_systems.to_deepmd_raw("./my_deepmd_data/") -``` - -You may also use the following code to parse muti-system: -```python -from dpdata import LabeledSystem, MultiSystems -from glob import glob - -""" -process multi systems -""" -fs = glob("./*/OUTCAR") # remeber to change here !!! -ms = MultiSystems() -for f in fs: - try: - ls = LabeledSystem(f) - except: - print(f) - if len(ls) > 0: - ms.append(ls) - -ms.to_deepmd_raw("deepmd") -ms.to_deepmd_npy("deepmd") -``` +To test if the installation is successful, you may execute -### Access data -These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example -```python -coords = d_outcar["coords"] -``` -Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system) - -| key | type | dimension | are labels | description -| --- | --- | --- | --- | --- -| 'atom_names' | list of str | ntypes | False | The name of each atom type -| 'atom_numbs' | list of int | ntypes | False | The number of atoms of each atom type -| 'atom_types' | np.ndarray | natoms | False | Array assigning type to each atom -| 'cells' | np.ndarray | nframes x 3 x 3 | False | The cell tensor of each frame -| 'coords' | np.ndarray | nframes x natoms x 3 | False | The atom coordinates -| 'energies' | np.ndarray | nframes | True | The frame energies -| 'forces' | np.ndarray | nframes x natoms x 3 | True | The atom forces -| 'virials' | np.ndarray | nframes x 3 x 3 | True | The virial tensor of each frame - - -### Dump data -The data stored in `System` or `LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example: -```python -d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0) -``` -The first frames of `d_outcar` will be dumped to 'conf.lmp' -```python -d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1) -``` -The last frames of `d_outcar` will be dumped to 'POSCAR'. - -The data stored in `LabeledSystem` can be dumped to deepmd-kit raw format, for example -```python -d_outcar.to("deepmd/raw", "dpmd_raw") -``` -Or a simpler command: -```python -dpdata.LabeledSystem("OUTCAR").to("deepmd/raw", "dpmd_raw") -``` -Frame selection can be implemented by -```python -dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw") -``` -by which only the first and last frames are dumped to `dpmd_raw`. - - -### replicate -dpdata will create a super cell of the current atom configuration. -```python -dpdata.System("./POSCAR").replicate( - ( - 1, - 2, - 3, - ) -) -``` -tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction. - - -### perturb -By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). -```python -perturbed_system = dpdata.System("./POSCAR").perturb( - pert_num=3, - cell_pert_fraction=0.05, - atom_pert_distance=0.6, - atom_pert_style="normal", -) -print(perturbed_system.data) -``` - -### replace -By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged. -```python -s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar") -s.replace("Hf", "Zr", 8) -s.to_vasp_poscar("POSCAR.P42nmc.replace") -``` - -## BondOrderSystem -A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). -```python -import dpdata - -system_1 = dpdata.BondOrderSystem( - "tests/bond_order/CH3OH.mol", fmt="mol" -) # read from .mol file -system_2 = dpdata.BondOrderSystem( - "tests/bond_order/methane.sdf", fmt="sdf" -) # read from .sdf file -``` -In sdf file, all molecules must be of the same topology (i.e. conformers of the same molecular configuration). -`BondOrderSystem` also supports initialize from a `rdkit.Chem.rdchem.Mol` object directly. -```python -from rdkit import Chem -from rdkit.Chem import AllChem -import dpdata - -mol = Chem.MolFromSmiles("CC") -mol = Chem.AddHs(mol) -AllChem.EmbedMultipleConfs(mol, 10) -system = dpdata.BondOrderSystem(rdkit_mol=mol) -``` - -### Bond Order Assignment -The `BondOrderSystem` implements a more robust sanitize procedure for rdkit Mol, as defined in `dpdata.rdkit.santizie.Sanitizer`. This class defines 3 level of sanitization process by: low, medium and high. (default is medium). -+ low: use `rdkit.Chem.SanitizeMol()` function to sanitize molecule. -+ medium: before using rdkit, the programm will first assign formal charge of each atom to avoid inappropriate valence exceptions. However, this mode requires the rightness of the bond order information in the given molecule. -+ high: the program will try to fix inappropriate bond orders in aromatic hetreocycles, phosphate, sulfate, carboxyl, nitro, nitrine, guanidine groups. If this procedure fails to sanitize the given molecule, the program will then try to call `obabel` to pre-process the mol and repeat the sanitization procedure. **That is to say, if you wan't to use this level of sanitization, please ensure `obabel` is installed in the environment.** -According to our test, our sanitization procedure can successfully read 4852 small molecules in the PDBBind-refined-set. It is necessary to point out that the in the molecule file (mol/sdf), the number of explicit hydrogens has to be correct. Thus, we recommend to use - `obabel xxx -O xxx -h` to pre-process the file. The reason why we do not implement this hydrogen-adding procedure in dpdata is that we can not ensure its correctness. - -```python -import dpdata - -for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): - syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False) -``` -### Formal Charge Assignment -BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As -```python -import dpdata - -syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt="mol") -print(syst.get_formal_charges()) # return the formal charge on each atom -print(syst.get_charge()) # return the total charge of the system +```bash +dpdata --version ``` -If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule. +## Supported packages -## Mixed Type Format -The format `deepmd/npy/mixed` is the mixed type numpy format for DeePMD-kit, and can be loaded or dumped through class `dpdata.MultiSystems`. +`dpdata` is aimmed to support different kinds of atomistic packages: -Under this format, systems with the same number of atoms but different formula can be put together -for a larger system, especially when the frame numbers in systems are sparse. +- Atomistic machine learning packages, such as [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit); +- Molecular dynamics packages, such as [LAMMPS](https://github.com/lammps/lammps) and [GROMACS](https://gitlab.com/gromacs/gromacs); +- Quantum chemistry packages, such as [VASP](https://www.vasp.at/), [Gaussian](https://gaussian.com), and [ABACUS](https://github.com/deepmodeling/abacus-develop); +- Atomistic visualization packages, such as [3Dmol.js](https://3dmol.csb.pitt.edu/). +- Other atomistic tools, such as [ASE](https://gitlab.com/ase/ase). +- Common formats such as `xyz`. -This also helps to mixture the type information together for model training with type embedding network. +All supported formats are listed [here](https://docs.deepmodeling.com/projects/dpdata/en/master/formats.html). -Here are examples using `deepmd/npy/mixed` format: +## Quick start -- Dump a MultiSystems into a mixed type numpy directory: -```python -import dpdata +The quickest way to convert a simple file from one format to another one is to use the [command line](https://docs.deepmodeling.com/projects/dpdata/en/master/cli.html). -dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") +```sh +dpdata OUTCAR -i vasp/outcar -o deepmd/npy -O deepmd_data ``` -- Load a mixed type data into a MultiSystems: -```python -import dpdata - -dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") -``` +For advanced usage with Python APIs, [read dpdata documentation](https://docs.deepmodeling.com/projects/dpdata/). ## Plugins -One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's critical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in [`pyproject.toml`](plugin_example/pyproject.toml): -```toml -[project.entry-points.'dpdata.plugins'] -random = "dpdata_random:RandomFormat" -``` +- [cp2kdata](https://github.com/robinzyb/cp2kdata) adds the latest CP2K support for dpdata. + +For how to create your own plugin packages, [read dpdata documentation](https://docs.deepmodeling.com/projects/dpdata/). diff --git a/docs/index.rst b/docs/index.rst index 07e381de7..3d98bd566 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,22 +6,23 @@ Welcome to dpdata's documentation! ================================== +dpdata is a Python package for manipulating atomistic data of software in computational science. + .. toctree:: :maxdepth: 2 :caption: Contents: - Overview + installation + systems/index try_dpdata cli formats drivers minimizers + plugin api/api credits -.. mdinclude:: ../README.md - - Indices and tables ================== diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 000000000..064f91331 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,13 @@ +# Installation + +DP-GEN only supports Python 3.7 and above. You can [setup a conda/pip environment](https://docs.deepmodeling.com/faq/conda.html), and then use one of the following methods to install DP-GEN: + +- Install via pip: `pip install dpdata` +- Install via conda: `conda install -c conda-forge dpdata` +- Install from source code: `git clone https://github.com/deepmodeling/dpdata && pip install ./dpdata` + +To test if the installation is successful, you may execute + +```bash +dpdata --version +``` diff --git a/docs/plugin.md b/docs/plugin.md new file mode 100644 index 000000000..44e8eabf7 --- /dev/null +++ b/docs/plugin.md @@ -0,0 +1,9 @@ +# Plugins + +One can follow a simple example under `plugin_example/` directory to add their own format by creating and installing plugins. +It's critical to add the :class:`Format` class to `entry_points['dpdata.plugins']` in `pyproject.toml`: + +```toml +[project.entry-points.'dpdata.plugins'] +random = "dpdata_random:RandomFormat" +``` diff --git a/docs/systems/bond_order_system.md b/docs/systems/bond_order_system.md new file mode 100644 index 000000000..443120a76 --- /dev/null +++ b/docs/systems/bond_order_system.md @@ -0,0 +1,51 @@ + +## BondOrderSystem +A new class :class:`BondOrderSystem` which inherits from class :class:`System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel). +```python +import dpdata + +system_1 = dpdata.BondOrderSystem( + "tests/bond_order/CH3OH.mol", fmt="mol" +) # read from .mol file +system_2 = dpdata.BondOrderSystem( + "tests/bond_order/methane.sdf", fmt="sdf" +) # read from .sdf file +``` +In sdf file, all molecules must be of the same topology (i.e. conformers of the same molecular configuration). +`BondOrderSystem` also supports initialize from a :class:`rdkit.Chem.rdchem.Mol` object directly. +```python +from rdkit import Chem +from rdkit.Chem import AllChem +import dpdata + +mol = Chem.MolFromSmiles("CC") +mol = Chem.AddHs(mol) +AllChem.EmbedMultipleConfs(mol, 10) +system = dpdata.BondOrderSystem(rdkit_mol=mol) +``` + +### Bond Order Assignment +The :class:`BondOrderSystem` implements a more robust sanitize procedure for rdkit Mol, as defined in :class:`dpdata.rdkit.santizie.Sanitizer`. This class defines 3 level of sanitization process by: low, medium and high. (default is medium). ++ low: use `rdkit.Chem.SanitizeMol()` function to sanitize molecule. ++ medium: before using rdkit, the programm will first assign formal charge of each atom to avoid inappropriate valence exceptions. However, this mode requires the rightness of the bond order information in the given molecule. ++ high: the program will try to fix inappropriate bond orders in aromatic hetreocycles, phosphate, sulfate, carboxyl, nitro, nitrine, guanidine groups. If this procedure fails to sanitize the given molecule, the program will then try to call `obabel` to pre-process the mol and repeat the sanitization procedure. **That is to say, if you wan't to use this level of sanitization, please ensure `obabel` is installed in the environment.** +According to our test, our sanitization procedure can successfully read 4852 small molecules in the PDBBind-refined-set. It is necessary to point out that the in the molecule file (mol/sdf), the number of explicit hydrogens has to be correct. Thus, we recommend to use + `obabel xxx -O xxx -h` to pre-process the file. The reason why we do not implement this hydrogen-adding procedure in dpdata is that we can not ensure its correctness. + +```python +import dpdata + +for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"): + syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False) +``` +### Formal Charge Assignment +BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As +```python +import dpdata + +syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt="mol") +print(syst.get_formal_charges()) # return the formal charge on each atom +print(syst.get_charge()) # return the total charge of the system +``` + +If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule. diff --git a/docs/systems/index.rst b/docs/systems/index.rst new file mode 100644 index 000000000..dfcbe698e --- /dev/null +++ b/docs/systems/index.rst @@ -0,0 +1,11 @@ +Systems +======= + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + system + multi + bond_order_system + mixed diff --git a/docs/systems/mixed.md b/docs/systems/mixed.md new file mode 100644 index 000000000..69bf80ea8 --- /dev/null +++ b/docs/systems/mixed.md @@ -0,0 +1,24 @@ +# Mixed Type Format + +The format `deepmd/npy/mixed` is the mixed type numpy format for DeePMD-kit, and can be loaded or dumped through class :class:`dpdata.MultiSystems`. + +Under this format, systems with the same number of atoms but different formula can be put together +for a larger system, especially when the frame numbers in systems are sparse. + +This also helps to mixture the type information together for model training with type embedding network. + +Here are examples using `deepmd/npy/mixed` format: + +- Dump a MultiSystems into a mixed type numpy directory: +```python +import dpdata + +dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir") +``` + +- Load a mixed type data into a MultiSystems: +```python +import dpdata + +dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed") +``` diff --git a/docs/systems/multi.md b/docs/systems/multi.md new file mode 100644 index 000000000..8e47acf70 --- /dev/null +++ b/docs/systems/multi.md @@ -0,0 +1,62 @@ +# `MultiSystems` + +The Class :class:`dpdata.MultiSystems` can read data from a dir which may contains many files of different systems, or from single xyz file which contains different systems. + +Use :meth:`dpdata.MultiSystems.from_dir` to read from a directory, :class:`dpdata.MultiSystems` will walk in the directory +Recursively and find all file with specific file_name. Supports all the file formats that :class:`dpdata.LabeledSystem` supports. + +Use :meth:`dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats. + +For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type. + +The following commands relating to :class:`dpdata.MultiSystems` may be useful. +```python +# load data + +xyz_multi_systems = dpdata.MultiSystems.from_file( + file_name="tests/xyz/xyz_unittest.xyz", fmt="quip/gap/xyz" +) +vasp_multi_systems = dpdata.MultiSystems.from_dir( + dir_name="./mgal_outcar", file_name="OUTCAR", fmt="vasp/outcar" +) + +# use wildcard +vasp_multi_systems = dpdata.MultiSystems.from_dir( + dir_name="./mgal_outcar", file_name="*OUTCAR", fmt="vasp/outcar" +) + +# print the multi_system infomation +print(xyz_multi_systems) +print(xyz_multi_systems.systems) # return a dictionaries + +# print the system infomation +print(xyz_multi_systems.systems["B1C9"].data) + +# dump a system's data to ./my_work_dir/B1C9_raw folder +xyz_multi_systems.systems["B1C9"].to_deepmd_raw("./my_work_dir/B1C9_raw") + +# dump all systems +xyz_multi_systems.to_deepmd_raw("./my_deepmd_data/") +``` + +You may also use the following code to parse muti-system: +```python +from dpdata import LabeledSystem, MultiSystems +from glob import glob + +""" +process multi systems +""" +fs = glob("./*/OUTCAR") # remeber to change here !!! +ms = MultiSystems() +for f in fs: + try: + ls = LabeledSystem(f) + except: + print(f) + if len(ls) > 0: + ms.append(ls) + +ms.to_deepmd_raw("deepmd") +ms.to_deepmd_npy("deepmd") +``` diff --git a/docs/systems/system.md b/docs/systems/system.md new file mode 100644 index 000000000..0401a3110 --- /dev/null +++ b/docs/systems/system.md @@ -0,0 +1,112 @@ +# `System` and `LabeledSystem` + +This section gives some examples on how dpdata works. Firstly one needs to import the module in a python 3.x compatible code. +```python +import dpdata +``` +The typicall workflow of `dpdata` is + +1. Load data from vasp or lammps or deepmd-kit data files. +2. Manipulate data +3. Dump data to in a desired format + + +### Load data +```python +d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar") +``` +or let dpdata infer the format (`vasp/poscar`) of the file from the file name extension +```python +d_poscar = dpdata.System("my.POSCAR") +``` +The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data :class:`System` called `d_poscar`. +A data :class:`System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one :class:`System`. +It is noted that `POSCAR` only contains one frame. +If the multiple frames stored in, for example, a `OUTCAR` is wanted, +```python +d_outcar = dpdata.LabeledSystem("OUTCAR") +``` +The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any), are loaded by :class:`LabeledSystem`. It is noted that the forces of atoms are always assumed to exist. :class:`LabeledSystem` is a derived class of :class:`System`. + +The :class:`System` or :class:`LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`: + + + +### Access data +These properties stored in :class:`System` and :class:`LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example +```python +coords = d_outcar["coords"] +``` +Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system) + +| key | type | dimension | are labels | description +| --- | --- | --- | --- | --- +| 'atom_names' | list of str | ntypes | False | The name of each atom type +| 'atom_numbs' | list of int | ntypes | False | The number of atoms of each atom type +| 'atom_types' | np.ndarray | natoms | False | Array assigning type to each atom +| 'cells' | np.ndarray | nframes x 3 x 3 | False | The cell tensor of each frame +| 'coords' | np.ndarray | nframes x natoms x 3 | False | The atom coordinates +| 'energies' | np.ndarray | nframes | True | The frame energies +| 'forces' | np.ndarray | nframes x natoms x 3 | True | The atom forces +| 'virials' | np.ndarray | nframes x 3 x 3 | True | The virial tensor of each frame + + +### Dump data +The data stored in :class:`System` or :class:`LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example: +```python +d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0) +``` +The first frames of `d_outcar` will be dumped to 'conf.lmp' +```python +d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1) +``` +The last frames of `d_outcar` will be dumped to 'POSCAR'. + +The data stored in `LabeledSystem` can be dumped to deepmd-kit raw format, for example +```python +d_outcar.to("deepmd/raw", "dpmd_raw") +``` +Or a simpler command: +```python +dpdata.LabeledSystem("OUTCAR").to("deepmd/raw", "dpmd_raw") +``` +Frame selection can be implemented by +```python +dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw") +``` +by which only the first and last frames are dumped to `dpmd_raw`. + + +### replicate +dpdata will create a super cell of the current atom configuration. +```python +dpdata.System("./POSCAR").replicate( + ( + 1, + 2, + 3, + ) +) +``` +tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction. + + +### perturb +By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere). +```python +perturbed_system = dpdata.System("./POSCAR").perturb( + pert_num=3, + cell_pert_fraction=0.05, + atom_pert_distance=0.6, + atom_pert_style="normal", +) +print(perturbed_system.data) +``` + +### replace +By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged. +```python +s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar") +s.replace("Hf", "Zr", 8) +s.to_vasp_poscar("POSCAR.P42nmc.replace") +``` From 5df6acd266589b131c961390414337472e0880d9 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 2 Sep 2024 22:18:50 -0400 Subject: [PATCH 21/24] fix: fix a typo in #702 (#710) ## Summary by CodeRabbit - **Improvements** - Enhanced the flexibility of data handling by modifying how data types are processed, allowing for better adaptability based on context. Signed-off-by: Jinzhe Zeng --- dpdata/deepmd/comp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index 661110162..ad6384088 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -180,7 +180,7 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): dtypes = dpdata.system.LabeledSystem.DTYPES else: dtypes = dpdata.system.System.DTYPES - for dtype in dpdata.system.LabeledSystem.DTYPES: + for dtype in dtypes: if dtype.name in ( "atom_numbs", "atom_names", From 6bf41e31b4952e54e7cec6f8b6c9cc3aabdc05cb Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 2 Sep 2024 22:58:22 -0400 Subject: [PATCH 22/24] feat: file object passed to open (#709) ## Summary by CodeRabbit - **New Features** - Introduced a new utility function `open_file` for improved file handling across various modules. - Enhanced type annotations for multiple functions to specify `FileType`, improving code clarity and type safety. - **Bug Fixes** - Improved file handling robustness by replacing the built-in `open` function with the custom `open_file` function in several modules. - **Tests** - Added unit tests for the new `open_file` utility function to ensure reliable functionality. --------- Signed-off-by: Jinzhe Zeng Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- dpdata/abacus/md.py | 10 +++++---- dpdata/abacus/relax.py | 8 ++++--- dpdata/abacus/scf.py | 10 +++++---- dpdata/amber/md.py | 7 +++--- dpdata/amber/sqm.py | 14 ++++++++---- dpdata/deepmd/comp.py | 3 ++- dpdata/deepmd/raw.py | 5 +++-- dpdata/dftbplus/output.py | 15 ++++++++++--- dpdata/gaussian/log.py | 11 +++++++-- dpdata/gromacs/gro.py | 10 +++++++-- dpdata/lammps/dump.py | 10 +++++++-- dpdata/openmx/omx.py | 21 +++++++++++------ dpdata/orca/output.py | 13 +++++++++-- dpdata/plugins/abacus.py | 10 +++++++-- dpdata/plugins/amber.py | 3 ++- dpdata/plugins/gaussian.py | 19 ++++++++++------ dpdata/plugins/gromacs.py | 12 ++++++++-- dpdata/plugins/lammps.py | 14 ++++++++---- dpdata/plugins/n2p2.py | 14 ++++++++---- dpdata/plugins/orca.py | 9 ++++++-- dpdata/plugins/psi4.py | 14 ++++++++---- dpdata/plugins/pwmat.py | 14 ++++++++---- dpdata/plugins/vasp.py | 15 ++++++++----- dpdata/plugins/xyz.py | 14 ++++++++---- dpdata/psi4/output.py | 10 +++++++-- dpdata/qe/scf.py | 6 +++-- dpdata/qe/traj.py | 20 +++++++++++------ dpdata/utils.py | 46 +++++++++++++++++++++++++++++++++++++- tests/test_read_file.py | 22 ++++++++++++++++++ 29 files changed, 289 insertions(+), 90 deletions(-) create mode 100644 tests/test_read_file.py diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py index fa1841777..9e0a416a0 100644 --- a/dpdata/abacus/md.py +++ b/dpdata/abacus/md.py @@ -5,6 +5,8 @@ import numpy as np +from dpdata.utils import open_file + from .scf import ( bohr2ang, get_cell, @@ -156,12 +158,12 @@ def get_frame(fname): path_in = os.path.join(fname, "INPUT") else: raise RuntimeError("invalid input") - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU path_out = get_path_out(fname, inlines) - with open(geometry_path_in) as fp: + with open_file(geometry_path_in) as fp: geometry_inlines = fp.read().split("\n") celldm, cell = get_cell(geometry_inlines) atom_names, natoms, types, coords = get_coords( @@ -172,11 +174,11 @@ def get_frame(fname): # ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0]) # number of dumped geometry files # coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell) - with open(os.path.join(path_out, "MD_dump")) as fp: + with open_file(os.path.join(path_out, "MD_dump")) as fp: dumplines = fp.read().split("\n") coords, cells, force, stress = get_coords_from_dump(dumplines, natoms) ndump = np.shape(coords)[0] - with open(os.path.join(path_out, "running_md.log")) as fp: + with open_file(os.path.join(path_out, "running_md.log")) as fp: outlines = fp.read().split("\n") energy = get_energy(outlines, ndump, dump_freq) diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py index 976243b82..7a4abf357 100644 --- a/dpdata/abacus/relax.py +++ b/dpdata/abacus/relax.py @@ -4,6 +4,8 @@ import numpy as np +from dpdata.utils import open_file + from .scf import ( bohr2ang, collect_force, @@ -174,10 +176,10 @@ def get_frame(fname): path_in = os.path.join(fname, "INPUT") else: raise RuntimeError("invalid input") - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU - with open(geometry_path_in) as fp: + with open_file(geometry_path_in) as fp: geometry_inlines = fp.read().split("\n") celldm, cell = get_cell(geometry_inlines) atom_names, natoms, types, coord_tmp = get_coords( @@ -186,7 +188,7 @@ def get_frame(fname): logf = get_log_file(fname, inlines) assert os.path.isfile(logf), f"Error: can not find {logf}" - with open(logf) as f1: + with open_file(logf) as f1: lines = f1.readlines() atomnumber = 0 diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py index c06f8cd3d..93e3d6e15 100644 --- a/dpdata/abacus/scf.py +++ b/dpdata/abacus/scf.py @@ -6,6 +6,8 @@ import numpy as np +from dpdata.utils import open_file + from ..unit import EnergyConversion, LengthConversion, PressureConversion bohr2ang = LengthConversion("bohr", "angstrom").value() @@ -253,7 +255,7 @@ def get_frame(fname): if not CheckFile(path_in): return data - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") geometry_path_in = get_geometry_in(fname, inlines) @@ -261,9 +263,9 @@ def get_frame(fname): if not (CheckFile(geometry_path_in) and CheckFile(path_out)): return data - with open(geometry_path_in) as fp: + with open_file(geometry_path_in) as fp: geometry_inlines = fp.read().split("\n") - with open(path_out) as fp: + with open_file(path_out) as fp: outlines = fp.read().split("\n") celldm, cell = get_cell(geometry_inlines) @@ -338,7 +340,7 @@ def get_nele_from_stru(geometry_inlines): def get_frame_from_stru(fname): assert isinstance(fname, str) - with open(fname) as fp: + with open_file(fname) as fp: geometry_inlines = fp.read().split("\n") nele = get_nele_from_stru(geometry_inlines) inlines = ["ntype %d" % nele] diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py index f3217fbd9..cb4f2d25e 100644 --- a/dpdata/amber/md.py +++ b/dpdata/amber/md.py @@ -7,6 +7,7 @@ from dpdata.amber.mask import pick_by_amber_mask from dpdata.unit import EnergyConversion +from dpdata.utils import open_file from ..periodic_table import ELEMENTS @@ -51,7 +52,7 @@ def read_amber_traj( flag_atom_numb = False amber_types = [] atomic_number = [] - with open(parm7_file) as f: + with open_file(parm7_file) as f: for line in f: if line.startswith("%FLAG"): flag_atom_type = line.startswith("%FLAG AMBER_ATOM_TYPE") @@ -101,14 +102,14 @@ def read_amber_traj( # load energy from mden_file or mdout_file energies = [] if mden_file is not None and os.path.isfile(mden_file): - with open(mden_file) as f: + with open_file(mden_file) as f: for line in f: if line.startswith("L6"): s = line.split() if s[2] != "E_pot": energies.append(float(s[2])) elif mdout_file is not None and os.path.isfile(mdout_file): - with open(mdout_file) as f: + with open_file(mdout_file) as f: for line in f: if "EPtot" in line: s = line.split() diff --git a/dpdata/amber/sqm.py b/dpdata/amber/sqm.py index 1be3802a2..93e41f9aa 100644 --- a/dpdata/amber/sqm.py +++ b/dpdata/amber/sqm.py @@ -1,9 +1,15 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.periodic_table import ELEMENTS from dpdata.unit import EnergyConversion +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType kcal2ev = EnergyConversion("kcal_mol", "eV").value() @@ -14,7 +20,7 @@ READ_FORCES = 7 -def parse_sqm_out(fname): +def parse_sqm_out(fname: FileType): """Read atom symbols, charges and coordinates from ambertools sqm.out file.""" atom_symbols = [] coords = [] @@ -22,7 +28,7 @@ def parse_sqm_out(fname): forces = [] energies = [] - with open(fname) as f: + with open_file(fname) as f: flag = START for line in f: if line.startswith(" Total SCF energy"): @@ -81,7 +87,7 @@ def parse_sqm_out(fname): return data -def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): +def make_sqm_in(data, fname: FileType | None = None, frame_idx=0, **kwargs): symbols = [data["atom_names"][ii] for ii in data["atom_types"]] atomic_numbers = [ELEMENTS.index(ss) + 1 for ss in symbols] charge = kwargs.get("charge", 0) @@ -109,6 +115,6 @@ def make_sqm_in(data, fname=None, frame_idx=0, **kwargs): f"{data['coords'][frame_idx][ii, 2]:.6f}", ) if fname is not None: - with open(fname, "w") as fp: + with open_file(fname, "w") as fp: fp.write(ret) return ret diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index ad6384088..5ba3914e7 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -8,6 +8,7 @@ import numpy as np import dpdata +from dpdata.utils import open_file from .raw import load_type @@ -172,7 +173,7 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): except OSError: pass if data.get("nopbc", False): - with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: + with open_file(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass # allow custom dtypes labels = "energies" in data diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py index 717c73f96..f8ddcaf35 100644 --- a/dpdata/deepmd/raw.py +++ b/dpdata/deepmd/raw.py @@ -6,6 +6,7 @@ import numpy as np import dpdata +from dpdata.utils import open_file def load_type(folder, type_map=None): @@ -17,7 +18,7 @@ def load_type(folder, type_map=None): data["atom_names"] = [] # if find type_map.raw, use it if os.path.isfile(os.path.join(folder, "type_map.raw")): - with open(os.path.join(folder, "type_map.raw")) as fp: + with open_file(os.path.join(folder, "type_map.raw")) as fp: my_type_map = fp.read().split() # else try to use arg type_map elif type_map is not None: @@ -140,7 +141,7 @@ def dump(folder, data): except OSError: pass if data.get("nopbc", False): - with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc: + with open_file(os.path.join(folder, "nopbc"), "w") as fw_nopbc: pass # allow custom dtypes labels = "energies" in data diff --git a/dpdata/dftbplus/output.py b/dpdata/dftbplus/output.py index 0f10c3ac9..49fdd2b1b 100644 --- a/dpdata/dftbplus/output.py +++ b/dpdata/dftbplus/output.py @@ -1,9 +1,18 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + -def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.ndarray]: +def read_dftb_plus( + fn_1: FileType, fn_2: FileType +) -> tuple[str, np.ndarray, float, np.ndarray]: """Read from DFTB+ input and output. Parameters @@ -29,7 +38,7 @@ def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.nda symbols = None forces = None energy = None - with open(fn_1) as f: + with open_file(fn_1) as f: flag = 0 for line in f: if flag == 1: @@ -49,7 +58,7 @@ def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.nda flag += 1 if flag == 7: flag = 0 - with open(fn_2) as f: + with open_file(fn_2) as f: flag = 0 for line in f: if line.startswith("Total Forces"): diff --git a/dpdata/gaussian/log.py b/dpdata/gaussian/log.py index 204cf464c..08a65b9dc 100644 --- a/dpdata/gaussian/log.py +++ b/dpdata/gaussian/log.py @@ -1,7 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..periodic_table import ELEMENTS from ..unit import EnergyConversion, ForceConversion, LengthConversion @@ -12,7 +19,7 @@ symbols = ["X"] + ELEMENTS -def to_system_data(file_name, md=False): +def to_system_data(file_name: FileType, md=False): """Read Gaussian log file. Parameters @@ -43,7 +50,7 @@ def to_system_data(file_name, md=False): nopbc = True coords = None - with open(file_name) as fp: + with open_file(file_name) as fp: for line in fp: if line.startswith(" SCF Done"): # energies diff --git a/dpdata/gromacs/gro.py b/dpdata/gromacs/gro.py index aca2443b8..fe83e0c5c 100644 --- a/dpdata/gromacs/gro.py +++ b/dpdata/gromacs/gro.py @@ -2,9 +2,15 @@ from __future__ import annotations import re +from typing import TYPE_CHECKING import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..unit import LengthConversion nm2ang = LengthConversion("nm", "angstrom").value() @@ -48,9 +54,9 @@ def _get_cell(line): return cell -def file_to_system_data(fname, format_atom_name=True, **kwargs): +def file_to_system_data(fname: FileType, format_atom_name=True, **kwargs): system = {"coords": [], "cells": []} - with open(fname) as fp: + with open_file(fname) as fp: frame = 0 while True: flag = fp.readline() diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py index f0ade2b03..72fee27df 100644 --- a/dpdata/lammps/dump.py +++ b/dpdata/lammps/dump.py @@ -3,9 +3,15 @@ import os import sys +from typing import TYPE_CHECKING import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + lib_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(lib_path) import warnings @@ -169,11 +175,11 @@ def box2dumpbox(orig, box): return bounds, tilt -def load_file(fname, begin=0, step=1): +def load_file(fname: FileType, begin=0, step=1): lines = [] buff = [] cc = -1 - with open(fname) as fp: + with open_file(fname) as fp: while True: line = fp.readline().rstrip("\n") if not line: diff --git a/dpdata/openmx/omx.py b/dpdata/openmx/omx.py index d3afff00f..aae9b5786 100644 --- a/dpdata/openmx/omx.py +++ b/dpdata/openmx/omx.py @@ -1,8 +1,15 @@ #!/usr/bin/python3 from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..unit import ( EnergyConversion, ForceConversion, @@ -98,12 +105,12 @@ def load_cells(lines): # load atom_names, atom_numbs, atom_types, cells -def load_param_file(fname, mdname): - with open(fname) as dat_file: +def load_param_file(fname: FileType, mdname: FileType): + with open_file(fname) as dat_file: lines = dat_file.readlines() atom_names, atom_types, atom_numbs = load_atom(lines) - with open(mdname) as md_file: + with open_file(mdname) as md_file: lines = md_file.readlines() cells = load_cells(lines) return atom_names, atom_numbs, atom_types, cells @@ -133,15 +140,15 @@ def load_coords(lines, atom_names, natoms): return coords -def load_data(mdname, atom_names, natoms): - with open(mdname) as md_file: +def load_data(mdname: FileType, atom_names, natoms): + with open_file(mdname) as md_file: lines = md_file.readlines() coords = load_coords(lines, atom_names, natoms) steps = [str(i) for i in range(1, coords.shape[0] + 1)] return coords, steps -def to_system_data(fname, mdname): +def to_system_data(fname: FileType, mdname: FileType): data = {} ( data["atom_names"], @@ -194,7 +201,7 @@ def load_force(lines, atom_names, atom_numbs): # load energy, force def to_system_label(fname, mdname): atom_names, atom_numbs, atom_types, cells = load_param_file(fname, mdname) - with open(mdname) as md_file: + with open_file(mdname) as md_file: lines = md_file.readlines() energy = load_energy(lines) force = load_force(lines, atom_names, atom_numbs) diff --git a/dpdata/orca/output.py b/dpdata/orca/output.py index a23013fda..a0915162b 100644 --- a/dpdata/orca/output.py +++ b/dpdata/orca/output.py @@ -1,9 +1,18 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + -def read_orca_sp_output(fn: str) -> tuple[np.ndarray, np.ndarray, float, np.ndarray]: +def read_orca_sp_output( + fn: FileType, +) -> tuple[np.ndarray, np.ndarray, float, np.ndarray]: """Read from ORCA output. Note that both the energy and the gradient should be printed. @@ -28,7 +37,7 @@ def read_orca_sp_output(fn: str) -> tuple[np.ndarray, np.ndarray, float, np.ndar symbols = None forces = None energy = None - with open(fn) as f: + with open_file(fn) as f: flag = 0 for line in f: if flag in (1, 3, 4): diff --git a/dpdata/plugins/abacus.py b/dpdata/plugins/abacus.py index eb2d7786f..e3367b351 100644 --- a/dpdata/plugins/abacus.py +++ b/dpdata/plugins/abacus.py @@ -1,9 +1,15 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import dpdata.abacus.md import dpdata.abacus.relax import dpdata.abacus.scf from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("abacus/stru") @@ -12,7 +18,7 @@ class AbacusSTRUFormat(Format): def from_system(self, file_name, **kwargs): return dpdata.abacus.scf.get_frame_from_stru(file_name) - def to_system(self, data, file_name, frame_idx=0, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs): """Dump the system into ABACUS STRU format file. Parameters @@ -46,7 +52,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): numerical_descriptor=numerical_descriptor, mass=mass, ) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(stru_string) diff --git a/dpdata/plugins/amber.py b/dpdata/plugins/amber.py index 42fce5528..361e0d8a7 100644 --- a/dpdata/plugins/amber.py +++ b/dpdata/plugins/amber.py @@ -8,6 +8,7 @@ import dpdata.amber.sqm from dpdata.driver import Driver, Minimizer from dpdata.format import Format +from dpdata.utils import open_file @Format.register("amber/md") @@ -143,7 +144,7 @@ def label(self, data: dict) -> dict: [*self.sqm_exec.split(), "-O", "-i", inp_fn, "-o", out_fn] ) except sp.CalledProcessError as e: - with open(out_fn) as f: + with open_file(out_fn) as f: raise RuntimeError( "Run sqm failed! Output:\n" + f.read() ) from e diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py index b55447b91..55bee5a4c 100644 --- a/dpdata/plugins/gaussian.py +++ b/dpdata/plugins/gaussian.py @@ -3,16 +3,21 @@ import os import subprocess as sp import tempfile +from typing import TYPE_CHECKING import dpdata.gaussian.gjf import dpdata.gaussian.log from dpdata.driver import Driver from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("gaussian/log") class GaussianLogFormat(Format): - def from_labeled_system(self, file_name, md=False, **kwargs): + def from_labeled_system(self, file_name: FileType, md=False, **kwargs): try: return dpdata.gaussian.log.to_system_data(file_name, md=md) except AssertionError: @@ -21,7 +26,7 @@ def from_labeled_system(self, file_name, md=False, **kwargs): @Format.register("gaussian/md") class GaussianMDFormat(Format): - def from_labeled_system(self, file_name, **kwargs): + def from_labeled_system(self, file_name: FileType, **kwargs): return GaussianLogFormat().from_labeled_system(file_name, md=True) @@ -29,7 +34,7 @@ def from_labeled_system(self, file_name, **kwargs): class GaussiaGJFFormat(Format): """Gaussian input file.""" - def from_system(self, file_name: str, **kwargs): + def from_system(self, file_name: FileType, **kwargs): """Read Gaussian input file. Parameters @@ -39,11 +44,11 @@ def from_system(self, file_name: str, **kwargs): **kwargs : dict keyword arguments """ - with open(file_name) as fp: + with open_file(file_name) as fp: text = fp.read() return dpdata.gaussian.gjf.read_gaussian_input(text) - def to_system(self, data: dict, file_name: str, **kwargs): + def to_system(self, data: dict, file_name: FileType, **kwargs): """Generate Gaussian input file. Parameters @@ -56,7 +61,7 @@ def to_system(self, data: dict, file_name: str, **kwargs): Other parameters to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input` """ text = dpdata.gaussian.gjf.make_gaussian_input(data, **kwargs) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(text) @@ -110,7 +115,7 @@ def label(self, data: dict) -> dict: try: sp.check_output([*self.gaussian_exec.split(), inp_fn]) except sp.CalledProcessError as e: - with open(out_fn) as f: + with open_file(out_fn) as f: out = f.read() raise RuntimeError("Run gaussian failed! Output:\n" + out) from e labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="gaussian/log")) diff --git a/dpdata/plugins/gromacs.py b/dpdata/plugins/gromacs.py index 12dece718..a7066bbcc 100644 --- a/dpdata/plugins/gromacs.py +++ b/dpdata/plugins/gromacs.py @@ -1,7 +1,13 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import dpdata.gromacs.gro from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("gro") @@ -23,7 +29,9 @@ def from_system(self, file_name, format_atom_name=True, **kwargs): file_name, format_atom_name=format_atom_name, **kwargs ) - def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): + def to_system( + self, data, file_name: FileType | None = None, frame_idx=-1, **kwargs + ): """Dump the system in gromacs .gro format. Parameters @@ -52,5 +60,5 @@ def to_system(self, data, file_name=None, frame_idx=-1, **kwargs): if file_name is None: return gro_str else: - with open(file_name, "w+") as fp: + with open_file(file_name, "w+") as fp: fp.write(gro_str) diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 65e7f5701..0327d444b 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -1,20 +1,26 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import dpdata.lammps.dump import dpdata.lammps.lmp from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("lmp") @Format.register("lammps/lmp") class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, file_name, type_map=None, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, type_map=None, **kwargs): + with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] return dpdata.lammps.lmp.to_system_data(lines, type_map) - def to_system(self, data, file_name, frame_idx=0, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs): """Dump the system in lammps data format. Parameters @@ -30,7 +36,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): """ assert frame_idx < len(data["coords"]) w_str = dpdata.lammps.lmp.from_system_data(data, frame_idx) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/n2p2.py b/dpdata/plugins/n2p2.py index b70d6e6fb..28942ff5f 100644 --- a/dpdata/plugins/n2p2.py +++ b/dpdata/plugins/n2p2.py @@ -1,8 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType from ..unit import EnergyConversion, ForceConversion, LengthConversion @@ -44,7 +50,7 @@ class N2P2Format(Format): For more information about the n2p2 format, please refer to https://compphysvienna.github.io/n2p2/topics/cfg_file.html """ - def from_labeled_system(self, file_name, **kwargs): + def from_labeled_system(self, file_name: FileType, **kwargs): """Read from n2p2 format. Parameters @@ -67,7 +73,7 @@ def from_labeled_system(self, file_name, **kwargs): natom0 = None natoms0 = None atom_types0 = None - with open(file_name) as file: + with open_file(file_name) as file: for line in file: line = line.strip() # Remove leading/trailing whitespace if line.lower() == "begin": @@ -155,7 +161,7 @@ def from_labeled_system(self, file_name, **kwargs): "forces": forces, } - def to_labeled_system(self, data, file_name, **kwargs): + def to_labeled_system(self, data, file_name: FileType, **kwargs): """Write n2p2 format. By default, LabeledSystem.to will fallback to System.to. @@ -193,5 +199,5 @@ def to_labeled_system(self, data, file_name, **kwargs): buff.append(f"energy {energy:15.6f}") buff.append(f"charge {0:15.6f}") buff.append("end") - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write("\n".join(buff)) diff --git a/dpdata/plugins/orca.py b/dpdata/plugins/orca.py index 9dc32c32c..7a0b806c9 100644 --- a/dpdata/plugins/orca.py +++ b/dpdata/plugins/orca.py @@ -1,11 +1,16 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format from dpdata.orca.output import read_orca_sp_output from dpdata.unit import EnergyConversion, ForceConversion +if TYPE_CHECKING: + from dpdata.utils import FileType + energy_convert = EnergyConversion("hartree", "eV").value() force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() @@ -18,12 +23,12 @@ class ORCASPOutFormat(Format): printed into the output file. """ - def from_labeled_system(self, file_name: str, **kwargs) -> dict: + def from_labeled_system(self, file_name: FileType, **kwargs) -> dict: """Read from ORCA single point energy output. Parameters ---------- - file_name : str + file_name : FileType file name **kwargs keyword arguments diff --git a/dpdata/plugins/psi4.py b/dpdata/plugins/psi4.py index a0cf00e46..2bbfc2321 100644 --- a/dpdata/plugins/psi4.py +++ b/dpdata/plugins/psi4.py @@ -1,11 +1,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format from dpdata.psi4.input import write_psi4_input from dpdata.psi4.output import read_psi4_output from dpdata.unit import EnergyConversion, ForceConversion +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType energy_convert = EnergyConversion("hartree", "eV").value() force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value() @@ -19,12 +25,12 @@ class PSI4OutFormat(Format): printed into the output file. """ - def from_labeled_system(self, file_name: str, **kwargs) -> dict: + def from_labeled_system(self, file_name: FileType, **kwargs) -> dict: """Read from Psi4 output. Parameters ---------- - file_name : str + file_name : FileType file name **kwargs keyword arguments @@ -61,7 +67,7 @@ class PSI4InputFormat(Format): def to_system( self, data: dict, - file_name: str, + file_name: FileType, method: str, basis: str, charge: int = 0, @@ -91,7 +97,7 @@ def to_system( keyword arguments """ types = np.array(data["atom_names"])[data["atom_types"]] - with open(file_name, "w") as fout: + with open_file(file_name, "w") as fout: fout.write( write_psi4_input( types=types, diff --git a/dpdata/plugins/pwmat.py b/dpdata/plugins/pwmat.py index 80f219b6c..ba3dab160 100644 --- a/dpdata/plugins/pwmat.py +++ b/dpdata/plugins/pwmat.py @@ -1,10 +1,16 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import dpdata.pwmat.atomconfig import dpdata.pwmat.movement from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("movement") @@ -47,12 +53,12 @@ def from_labeled_system( @Format.register("pwmat/final.config") class PwmatAtomconfigFormat(Format): @Format.post("rot_lower_triangular") - def from_system(self, file_name, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, **kwargs): + with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] return dpdata.pwmat.atomconfig.to_system_data(lines) - def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, *args, **kwargs): """Dump the system in pwmat atom.config format. Parameters @@ -70,5 +76,5 @@ def to_system(self, data, file_name, frame_idx=0, *args, **kwargs): """ assert frame_idx < len(data["coords"]) w_str = dpdata.pwmat.atomconfig.from_system_data(data, frame_idx) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/vasp.py b/dpdata/plugins/vasp.py index d0681cebf..0160bde29 100644 --- a/dpdata/plugins/vasp.py +++ b/dpdata/plugins/vasp.py @@ -1,12 +1,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np import dpdata.vasp.outcar import dpdata.vasp.poscar import dpdata.vasp.xml from dpdata.format import Format -from dpdata.utils import uniq_atom_names +from dpdata.utils import open_file, uniq_atom_names + +if TYPE_CHECKING: + from dpdata.utils import FileType @Format.register("poscar") @@ -15,14 +20,14 @@ @Format.register("vasp/contcar") class VASPPoscarFormat(Format): @Format.post("rot_lower_triangular") - def from_system(self, file_name, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, **kwargs): + with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] data = dpdata.vasp.poscar.to_system_data(lines) data = uniq_atom_names(data) return data - def to_system(self, data, file_name, frame_idx=0, **kwargs): + def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs): """Dump the system in vasp POSCAR format. Parameters @@ -37,7 +42,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs): other parameters """ w_str = VASPStringFormat().to_system(data, frame_idx=frame_idx) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write(w_str) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 322bf77cb..d56a8618c 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -1,8 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.format import Format +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord @@ -16,16 +22,16 @@ class XYZFormat(Format): >>> s.to("xyz", "a.xyz") """ - def to_system(self, data, file_name, **kwargs): + def to_system(self, data, file_name: FileType, **kwargs): buff = [] types = np.array(data["atom_names"])[data["atom_types"]] for cc in data["coords"]: buff.append(coord_to_xyz(cc, types)) - with open(file_name, "w") as fp: + with open_file(file_name, "w") as fp: fp.write("\n".join(buff)) - def from_system(self, file_name, **kwargs): - with open(file_name) as fp: + def from_system(self, file_name: FileType, **kwargs): + with open_file(file_name) as fp: coords, types = xyz_to_coord(fp.read()) atom_names, atom_types, atom_numbs = np.unique( types, return_inverse=True, return_counts=True diff --git a/dpdata/psi4/output.py b/dpdata/psi4/output.py index c06eb1829..c3594ffb4 100644 --- a/dpdata/psi4/output.py +++ b/dpdata/psi4/output.py @@ -1,11 +1,17 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import numpy as np from dpdata.unit import LengthConversion +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType -def read_psi4_output(fn: str) -> tuple[str, np.ndarray, float, np.ndarray]: +def read_psi4_output(fn: FileType) -> tuple[str, np.ndarray, float, np.ndarray]: """Read from Psi4 output. Note that both the energy and the gradient should be printed. @@ -31,7 +37,7 @@ def read_psi4_output(fn: str) -> tuple[str, np.ndarray, float, np.ndarray]: forces = None energy = None length_unit = None - with open(fn) as f: + with open_file(fn) as f: flag = 0 for line in f: if flag in (1, 3, 4, 5, 6): diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py index 37e5fbab6..f86708605 100755 --- a/dpdata/qe/scf.py +++ b/dpdata/qe/scf.py @@ -5,6 +5,8 @@ import numpy as np +from dpdata.utils import open_file + ry2ev = 13.605693009 bohr2ang = 0.52917721067 kbar2evperang3 = 1e3 / 1.602176621e6 @@ -142,9 +144,9 @@ def get_frame(fname): path_out = fname[1] else: raise RuntimeError("invalid input") - with open(path_out) as fp: + with open_file(path_out) as fp: outlines = fp.read().split("\n") - with open(path_in) as fp: + with open_file(path_in) as fp: inlines = fp.read().split("\n") cell = get_cell(inlines) atom_names, natoms, types, coords = get_coords(inlines, cell) diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py index 1fbf0f71c..b4be303ad 100644 --- a/dpdata/qe/traj.py +++ b/dpdata/qe/traj.py @@ -2,9 +2,15 @@ from __future__ import annotations import warnings +from typing import TYPE_CHECKING import numpy as np +from dpdata.utils import open_file + +if TYPE_CHECKING: + from dpdata.utils import FileType + from ..unit import ( EnergyConversion, ForceConversion, @@ -87,8 +93,8 @@ def load_atom_types(lines, natoms, atom_names): return np.array(ret, dtype=int) -def load_param_file(fname): - with open(fname) as fp: +def load_param_file(fname: FileType): + with open_file(fname) as fp: lines = fp.read().split("\n") natoms = int(load_key(lines, "nat")) ntypes = int(load_key(lines, "ntyp")) @@ -127,11 +133,11 @@ def _load_pos_block(fp, natoms): return blk, ss -def load_data(fname, natoms, begin=0, step=1, convert=1.0): +def load_data(fname: FileType, natoms, begin=0, step=1, convert=1.0): coords = [] steps = [] cc = 0 - with open(fname) as fp: + with open_file(fname) as fp: while True: blk, ss = _load_pos_block(fp, natoms) if blk is None: @@ -147,7 +153,7 @@ def load_data(fname, natoms, begin=0, step=1, convert=1.0): # def load_pos(fname, natoms) : # coords = [] -# with open(fname) as fp: +# with open_file(fname) as fp: # while True: # blk = _load_pos_block(fp, natoms) # # print(blk) @@ -164,7 +170,7 @@ def load_energy(fname, begin=0, step=1): steps = [] for ii in data[begin::step, 0]: steps.append("%d" % ii) - with open(fname) as fp: + with open_file(fname) as fp: while True: line = fp.readline() if not line: @@ -178,7 +184,7 @@ def load_energy(fname, begin=0, step=1): # def load_force(fname, natoms) : # coords = [] -# with open(fname) as fp: +# with open_file(fname) as fp: # while True: # blk = _load_pos_block(fp, natoms) # # print(blk) diff --git a/dpdata/utils.py b/dpdata/utils.py index e008120ea..8942bd54d 100644 --- a/dpdata/utils.py +++ b/dpdata/utils.py @@ -1,7 +1,10 @@ from __future__ import annotations +import io +import os import sys -from typing import overload +from contextlib import contextmanager +from typing import TYPE_CHECKING, Generator, overload if sys.version_info >= (3, 8): from typing import Literal @@ -129,3 +132,44 @@ def uniq_atom_names(data): def utf8len(s: str) -> int: """Return the byte length of a string.""" return len(s.encode("utf-8")) + + +if TYPE_CHECKING: + FileType = io.IOBase | str | os.PathLike + + +@contextmanager +def open_file(file: FileType, *args, **kwargs) -> Generator[io.IOBase, None, None]: + """A context manager that yields a file object. + + Parameters + ---------- + file : file object or file path + A file object or a file path. + + Yields + ------ + file : io.IOBase + A file object. + *args + parameters to open + **kwargs + other parameters + + Raises + ------ + ValueError + If file is not a file object or a file + + Examples + -------- + >>> with open_file("file.txt") as file: + ... print(file.read()) + """ + if isinstance(file, io.IOBase): + yield file + elif isinstance(file, (str, os.PathLike)): + with open(file, *args, **kwargs) as f: + yield f + else: + raise ValueError("file must be a file object or a file path.") diff --git a/tests/test_read_file.py b/tests/test_read_file.py new file mode 100644 index 000000000..e7dca54e6 --- /dev/null +++ b/tests/test_read_file.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import io +import unittest +from pathlib import Path + +from dpdata.utils import open_file + + +class TestReadFile(unittest.TestCase): + def test_open_file_from_string_io(self): + string_io = io.StringIO("Hello, world!") + with open_file(string_io) as file: + self.assertEqual(file.read(), "Hello, world!") + + def test_open_file_from_file_str(self): + with open_file("/dev/null") as file: + self.assertEqual(file.read(), Path("/dev/null").read_text()) + + def test_open_file_from_file_path(self): + with open_file(Path("/dev/null")) as file: + self.assertEqual(file.read(), Path("/dev/null").read_text()) From c311d16ff9db600815acdf23b005fa4ad388607c Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 2 Sep 2024 22:59:20 -0400 Subject: [PATCH 23/24] fix: only apply comp_prec for floating dtypes (#711) Fix #703. ## Summary by CodeRabbit - **New Features** - Improved handling of data types during the reshaping process, ensuring type conversion only occurs for floating-point data. - **Bug Fixes** - Enhanced robustness of data processing by preventing unnecessary type casting for non-floating-point data types. Signed-off-by: Jinzhe Zeng --- dpdata/deepmd/comp.py | 4 +++- dpdata/deepmd/hdf5.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py index 5ba3914e7..3d656b709 100644 --- a/dpdata/deepmd/comp.py +++ b/dpdata/deepmd/comp.py @@ -204,7 +204,9 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True): f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/npy format." ) continue - ddata = np.reshape(data[dtype.name], [nframes, -1]).astype(comp_prec) + ddata = np.reshape(data[dtype.name], [nframes, -1]) + if np.issubdtype(ddata.dtype, np.floating): + ddata = ddata.astype(comp_prec) for ii in range(nsets): set_stt = ii * set_size set_end = (ii + 1) * set_size diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py index 78f4e33fe..6b5878932 100644 --- a/dpdata/deepmd/hdf5.py +++ b/dpdata/deepmd/hdf5.py @@ -252,9 +252,10 @@ def dump( for dt, prop in data_types.items(): if dt in data: if prop["dump"]: - reshaped_data[dt] = np.reshape(data[dt], prop["shape"]).astype( - comp_prec - ) + ddata = np.reshape(data[dt], prop["shape"]) + if np.issubdtype(ddata.dtype, np.floating): + ddata = ddata.astype(comp_prec) + reshaped_data[dt] = ddata # dump frame properties: cell, coord, energy, force and virial nsets = nframes // set_size From bad02850c7cb982f937ce89e296809948081f9c7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 11:00:47 +0800 Subject: [PATCH 24/24] [pre-commit.ci] pre-commit autoupdate (#712) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.2 → v0.6.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.2...v0.6.3) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f4abb50c3..3f43b6975 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: # Python - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.6.2 + rev: v0.6.3 hooks: - id: ruff args: ["--fix"]