From e19e34e6bff4de658ca8b5dffff428c400b126a1 Mon Sep 17 00:00:00 2001
From: Duo <50307526+iProzd@users.noreply.github.com>
Date: Fri, 7 Jun 2024 04:27:00 +0800
Subject: [PATCH 01/24] fix: use deepcopy when dumping mixed types (#671)

Use deepcopy when dumping mixed types in system level. Fix #670 .

Typically, users use the following command to dump list of systems to
mixed_type:
```
dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir")
```
This command will rebuild a new system with a new data dict, thus it
**does not change the original `system.data`**.

In #670 , when users use
```
system.to('deepmd/npy/mixed', 'dir')
```
on a system in standard format, it would directly convert the original
`system.data` into mixed types, thus the original `system.data` would be
changed.

Use deepcopy to prevent this.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **Bug Fixes**
- Enhanced data integrity by preserving the original content before
processing.

- **Tests**
- Added new tests to ensure proper functionality of the data dumping
process.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/deepmd/mixed.py     |  4 ++++
 tests/test_deepmd_mixed.py | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/dpdata/deepmd/mixed.py b/dpdata/deepmd/mixed.py
index b25107dbc..ccf9b2f58 100644
--- a/dpdata/deepmd/mixed.py
+++ b/dpdata/deepmd/mixed.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import copy
 import glob
 import os
 import shutil
@@ -160,6 +161,9 @@ def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True):
     if "real_atom_types" not in data:
         from dpdata import LabeledSystem, System
 
+        # not change the original content
+        data = copy.deepcopy(data)
+
         if "energies" in data:
             temp_sys = LabeledSystem(data=data)
         else:
diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py
index 02044932e..70a09dbe8 100644
--- a/tests/test_deepmd_mixed.py
+++ b/tests/test_deepmd_mixed.py
@@ -314,5 +314,27 @@ def test_str(self):
         )
 
 
+class TestMixedSingleSystemsDump(unittest.TestCase, CompLabeledSys, IsNoPBC):
+    def setUp(self):
+        self.places = 6
+        self.e_places = 6
+        self.f_places = 6
+        self.v_places = 6
+
+        # C1H4
+        self.system_1 = dpdata.LabeledSystem(
+            "gaussian/methane.gaussianlog", fmt="gaussian/log"
+        )
+        self.system_2 = dpdata.LabeledSystem(
+            "gaussian/methane.gaussianlog", fmt="gaussian/log"
+        )
+        # test dump
+        self.system_1.to("deepmd/npy/mixed", "tmp.deepmd.mixed.single")
+
+    def tearDown(self):
+        if os.path.exists("tmp.deepmd.mixed.single"):
+            shutil.rmtree("tmp.deepmd.mixed.single")
+
+
 if __name__ == "__main__":
     unittest.main()

From a9a8b480410f2757d846ac986da270b024976bee Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 11 Jun 2024 09:41:56 +0800
Subject: [PATCH 02/24] [pre-commit.ci] pre-commit autoupdate (#674)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.4.7 →
v0.4.8](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.7...v0.4.8)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 23bea3ebd..f4451dedb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.4.7
+    rev: v0.4.8
     hooks:
     - id: ruff
       args: ["--fix"]

From fb85312d5a9da4f4e10e63df538d1343afd7db02 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 1 Jul 2024 17:40:02 -0400
Subject: [PATCH 03/24] [pre-commit.ci] pre-commit autoupdate (#675)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.4.8 →
v0.4.10](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.8...v0.4.10)
<!--pre-commit.ci end-->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 .github/workflows/benchmark.yml | 6 +++---
 .github/workflows/test.yml      | 2 +-
 .pre-commit-config.yaml         | 2 +-
 pyproject.toml                  | 5 +++++
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 650ff59ef..25650184b 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -1,11 +1,11 @@
-name: Python package
+name: Benchmark
 
 on:
   - push
   - pull_request
 
 jobs:
-  build:
+  benchmark:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
@@ -15,7 +15,7 @@ jobs:
         python-version: 3.12
     - run: curl -LsSf https://astral.sh/uv/install.sh | sh
     - name: Install dependencies
-      run: uv pip install --system .[amber,ase,pymatgen,benchmark] rdkit openbabel-wheel
+      run: uv pip install --system .[test,amber,ase,pymatgen,benchmark] rdkit openbabel-wheel
     - name: Run benchmarks
       uses: CodSpeedHQ/action@v2
       with:
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4a17fcde1..a6d23270c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -20,7 +20,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - run: curl -LsSf https://astral.sh/uv/install.sh | sh
     - name: Install dependencies
-      run: uv pip install --system .[amber,ase,pymatgen] coverage ./tests/plugin rdkit openbabel-wheel
+      run: uv pip install --system .[test,amber,ase,pymatgen] coverage ./tests/plugin rdkit openbabel-wheel
     - name: Test
       run: cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
     - name: Run codecov
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f4451dedb..d9e92b7b3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.4.8
+    rev: v0.4.10
     hooks:
     - id: ruff
       args: ["--fix"]
diff --git a/pyproject.toml b/pyproject.toml
index 6efe08188..35b30decf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,6 +41,11 @@ repository = "https://github.com/deepmodeling/dpdata"
 dpdata = "dpdata.cli:dpdata_cli"
 
 [project.optional-dependencies]
+test = [
+    # https://github.com/materialsproject/pymatgen/issues/3882
+    # https://github.com/kuelumbus/rdkit-pypi/issues/102
+    "numpy<2",
+]
 ase = ['ase']
 amber = [
     'parmed; python_version >= "3.8"',

From e429d9a54ef6d2cbb2d55eefa68bb9b96045fa25 Mon Sep 17 00:00:00 2001
From: Peng Xingliang <91927439+pxlxingliang@users.noreply.github.com>
Date: Tue, 2 Jul 2024 13:18:56 +0800
Subject: [PATCH 04/24] abacus: fix bug in collecting force (#669)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **Bug Fixes**
- Modified force data handling in computational simulations to return
`None` instead of an empty list when no forces are present.

- **Tests**
- Added new test scenarios to validate force data handling in labeled
system objects for computational simulations.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 dpdata/abacus/scf.py                          |   4 +-
 tests/abacus.scf/INPUT.ch4-noforcestress      |  17 +
 .../OUT.ch4-noForceStress/running_scf.log     | 559 ++++++++++++++++++
 tests/test_abacus_pw_scf.py                   |  18 +
 4 files changed, 596 insertions(+), 2 deletions(-)
 create mode 100644 tests/abacus.scf/INPUT.ch4-noforcestress
 create mode 100644 tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log

diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
index 193e4d4b5..c06f8cd3d 100644
--- a/dpdata/abacus/scf.py
+++ b/dpdata/abacus/scf.py
@@ -190,7 +190,7 @@ def collect_force(outlines):
 def get_force(outlines, natoms):
     force = collect_force(outlines)
     if len(force) == 0:
-        return [[]]
+        return None
     else:
         return np.array(force[-1])  # only return the last force
 
@@ -285,7 +285,7 @@ def get_frame(fname):
     data["cells"] = cell[np.newaxis, :, :]
     data["coords"] = coords[np.newaxis, :, :]
     data["energies"] = np.array(energy)[np.newaxis]
-    data["forces"] = force[np.newaxis, :, :]
+    data["forces"] = np.empty((0,)) if force is None else force[np.newaxis, :, :]
     if stress is not None:
         data["virials"] = stress[np.newaxis, :, :]
     data["orig"] = np.zeros(3)
diff --git a/tests/abacus.scf/INPUT.ch4-noforcestress b/tests/abacus.scf/INPUT.ch4-noforcestress
new file mode 100644
index 000000000..66d4a4e4d
--- /dev/null
+++ b/tests/abacus.scf/INPUT.ch4-noforcestress
@@ -0,0 +1,17 @@
+INPUT_PARAMETERS
+#Parameters	(General)
+suffix 			ch4-noForceStress
+stru_file 	        STRU.ch4 #the filename of file containing atom positions
+kpoint_file        	KPT.ch4 #the name of file containing k points
+pseudo_dir		./
+nbands		  	8
+#Parameters (Accuracy)
+ecutwfc			100
+symmetry		1
+scf_nmax		50
+smearing_method		gauss #type of smearing: gauss; fd; fixed; mp; mp2; mv
+smearing_sigma		0.01
+mixing_type 		plain
+mixing_beta 		0.5
+cal_force 		1
+cal_stress 		1
diff --git a/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log b/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log
new file mode 100644
index 000000000..c6183689f
--- /dev/null
+++ b/tests/abacus.scf/OUT.ch4-noForceStress/running_scf.log
@@ -0,0 +1,559 @@
+                                                                                     
+                             WELCOME TO ABACUS                                       
+                                                                                     
+               'Atomic-orbital Based Ab-initio Computation at UStc'                  
+                                                                                     
+                     Website: http://abacus.ustc.edu.cn/                             
+                                                                                     
+    Version: Parallel, v2.1.0
+    Processor Number is 1
+    Start Time is Fri May  7 16:18:50 2021
+                                                                                     
+ ------------------------------------------------------------------------------------
+
+ READING GENERAL INFORMATION
+                           global_out_dir = OUT.ch4/
+                           global_in_card = INPUT
+                               pseudo_dir = ./
+                              pseudo_type = auto
+                                    DRANK = 1
+                                    DSIZE = 1
+                                   DCOLOR = 1
+                                    GRANK = 1
+                                    GSIZE = 1
+
+
+
+
+ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+ |                                                                    |
+ | Reading atom information in unitcell:                              |
+ | From the input file and the structure file we know the number of   |
+ | different elments in this unitcell, then we list the detail        |
+ | information for each element, especially the zeta and polar atomic |
+ | orbital number for each element. The total atom number is counted. |
+ | We calculate the nearest atom distance for each atom and show the  |
+ | Cartesian and Direct coordinates for each atom. We list the file   |
+ | address for atomic orbitals. The volume and the lattice vectors    |
+ | in real and reciprocal space is also shown.                        |
+ |                                                                    |
+ <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+
+
+ READING UNITCELL INFORMATION
+                                    ntype = 2
+                 atom label for species 1 = C
+                 atom label for species 2 = H
+                  lattice constant (Bohr) = 10
+              lattice constant (Angstrom) = 5.29177
+
+ READING ATOM TYPE 1
+                               atom label = C
+                      start magnetization = FALSE
+                      L=0, number of zeta = 1
+                      L=1, number of zeta = 1
+                      L=2, number of zeta = 1
+             number of atom for this type = 1
+
+ READING ATOM TYPE 2
+                               atom label = H
+                      start magnetization = FALSE
+                      L=0, number of zeta = 1
+                      L=1, number of zeta = 1
+                      L=2, number of zeta = 1
+             number of atom for this type = 4
+
+                        TOTAL ATOM NUMBER = 5
+
+ CARTESIAN COORDINATES ( UNIT = 10 Bohr ).
+         atom                   x                   y                   z                 mag
+      tauc_C1         0.981274803      0.861285385001         0.838442496                   0
+      tauc_H1     0.0235572019992         0.758025625      0.663513359999                   0
+      tauc_H2          0.78075702      0.889445934999      0.837363467999                   0
+      tauc_H3     0.0640916129996     0.0434389050006         0.840995502                   0
+      tauc_H4     0.0393212140007         0.756530859    0.00960920699981                   0
+
+
+                          Volume (Bohr^3) = 1000
+                             Volume (A^3) = 148.184534296
+
+ Lattice vectors: (Cartesian coordinate: in unit of a_0)
+                   +1                  +0                  +0
+                   +0                  +1                  +0
+                   +0                  +0                  +1
+ Reciprocal vectors: (Cartesian coordinate: in unit of 2 pi/a_0)
+                   +1                  +0                  +0
+                   +0                  +1                  +0
+                   +0                  -0                  +1
+
+
+
+
+ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+ |                                                                    |
+ | Reading pseudopotentials files:                                    |
+ | The pseudopotential file is in UPF format. The 'NC' indicates that |
+ | the type of pseudopotential is 'norm conserving'. Functional of    |
+ | exchange and correlation is decided by 4 given parameters in UPF   |
+ | file.  We also read in the 'core correction' if there exists.      |
+ | Also we can read the valence electrons number and the maximal      |
+ | angular momentum used in this pseudopotential. We also read in the |
+ | trail wave function, trail atomic density and local-pseudopotential|
+ | on logrithmic grid. The non-local pseudopotential projector is also|
+ | read in if there is any.                                           |
+ |                                                                    |
+ <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+
+
+                PAO radial cut off (Bohr) = 15
+
+ Read in pseudopotential file is C_ONCV_PBE-1.0.upf
+                     pseudopotential type = NC
+                            functional Ex = PBE
+                            functional Ec = 
+                          functional GCEx = 
+                          functional GCEc = 
+                 nonlocal core correction = 0
+                        valence electrons = 4
+                                     lmax = 1
+                           number of zeta = 0
+                     number of projectors = 4
+                           L of projector = 0
+                           L of projector = 0
+                           L of projector = 1
+                           L of projector = 1
+                PAO radial cut off (Bohr) = 15
+
+ Read in pseudopotential file is H_ONCV_PBE-1.0.upf
+                     pseudopotential type = NC
+                            functional Ex = PBE
+                            functional Ec = 
+                          functional GCEx = 
+                          functional GCEc = 
+                 nonlocal core correction = 0
+                        valence electrons = 1
+                                     lmax = 0
+                           number of zeta = 0
+                     number of projectors = 2
+                           L of projector = 0
+                           L of projector = 0
+     initial pseudo atomic orbital number = 0
+                                   NLOCAL = 45
+
+ SETUP THE ELECTRONS NUMBER
+             electron number of element C = 4
+       total electron number of element C = 4
+             electron number of element H = 1
+       total electron number of element H = 4
+                           occupied bands = 4
+                                   NBANDS = 8
+ DONE : SETUP UNITCELL Time : 0.0506949424744 (SEC)
+
+
+
+
+
+ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+ |                                                                    |
+ | Doing symmetry analysis:                                           |
+ | We calculate the norm of 3 vectors and the angles between them,    |
+ | the type of Bravais lattice is given. We can judge if the unticell |
+ | is a primitive cell. Finally we give the point group operation for |
+ | this unitcell. We we use the point group operations to do symmetry |
+ | analysis on given k-point mesh and the charge density.             |
+ |                                                                    |
+ <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+
+
+ LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0)
+                   +1                  +0                  +0
+                   +0                  +1                  +0
+                   +0                  +0                  +1
+                       right hand lattice = 1
+                                   NORM_A = 1
+                                   NORM_B = 1
+                                   NORM_C = 1
+                           ALPHA (DEGREE) = 90
+                           BETA  (DEGREE) = 90
+                           GAMMA (DEGREE) = 90
+                             BRAVAIS TYPE = 1
+                     BRAVAIS LATTICE NAME = 01. Cubic P (simple)
+ STANDARD LATTICE VECTORS: (CARTESIAN COORDINATE: IN UNIT OF A0)
+                   +1                  +0                  +0
+                   +0                  +1                  +0
+                   +0                  +0                  +1
+                                    IBRAV = 1
+                                  BRAVAIS = SIMPLE CUBIC
+                       LATTICE CONSTANT A = 4.35889894354
+                                    ibrav = 1
+                        ROTATION MATRICES = 48
+              PURE POINT GROUP OPERATIONS = 1
+                   SPACE GROUP OPERATIONS = 1
+                              POINT GROUP = C_1
+ DONE : SYMMETRY Time : 0.103345155716 (SEC)
+
+
+
+
+
+ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+ |                                                                    |
+ | Setup K-points                                                     |
+ | We setup the k-points according to input parameters.               |
+ | The reduced k-points are set according to symmetry operations.     |
+ | We treat the spin as another set of k-points.                      |
+ |                                                                    |
+ <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+
+
+
+ SETUP K-POINTS
+                                    nspin = 1
+                   Input type of k points = Monkhorst-Pack(Gamma)
+                                   nkstot = 1
+                               nkstot_ibz = 1
+      IBZ             DirectX             DirectY             DirectZ              Weight    ibz2bz
+        1                   0                   0                   0                   1         0
+                               nkstot now = 1
+
+  KPOINTS            DIRECT_X            DIRECT_Y            DIRECT_Z              WEIGHT
+        1                   0                   0                   0                   1
+
+           k-point number in this process = 1
+       minimum distributed K point number = 1
+
+  KPOINTS         CARTESIAN_X         CARTESIAN_Y         CARTESIAN_Z              WEIGHT
+        1                   0                   0                   0                   2
+
+  KPOINTS            DIRECT_X            DIRECT_Y            DIRECT_Z              WEIGHT
+        1                   0                   0                   0                   2
+ DONE : INIT K-POINTS Time : 0.103604078293 (SEC)
+
+
+
+
+
+ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+ |                                                                    |
+ | Setup plane waves:                                                 |
+ | Use the energy cutoff and the lattice vectors to generate the      |
+ | dimensions of FFT grid. The number of FFT grid on each processor   |
+ | is 'nrxx'. The number of plane wave basis in reciprocal space is   |
+ | different for charege/potential and wave functions. We also set    |
+ | the 'sticks' for the parallel of FFT.                              |
+ |                                                                    |
+ <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+
+
+
+ SETUP THE PLANE WAVE BASIS
+     energy cutoff for wavefunc (unit:Ry) = 100
+            [fft grid for wave functions] = 64, 64, 64
+          [fft grid for charge/potential] = 64, 64, 64
+                      [fft grid division] = 1, 1, 1
+      [big fft grid for charge/potential] = 64, 64, 64
+                                     nbxx = 262144
+                                     nrxx = 262144
+
+ SETUP PLANE WAVES FOR CHARGE/POTENTIAL
+                    number of plane waves = 135043
+                         number of sticks = 3181
+
+ SETUP PLANE WAVES FOR WAVE FUNCTIONS
+                    number of plane waves = 16879
+                         number of sticks = 793
+
+ PARALLEL PW FOR CHARGE/POTENTIAL
+     PROC   COLUMNS(POT)             PW
+        1           3181         135043
+ --------------- sum -------------------
+        1           3181         135043
+
+ PARALLEL PW FOR WAVE FUNCTIONS
+     PROC     COLUMNS(W)             PW
+        1            793          16879
+ --------------- sum -------------------
+        1            793          16879
+
+ SETUP COORDINATES OF PLANE WAVES
+              number of total plane waves = 135043
+
+ SETUP COORDINATES OF PLANE WAVES
+                            number of |g| = 847
+                                  max |g| = 1013
+                                  min |g| = 0
+ DONE : INIT PLANEWAVE Time : 0.294428110123 (SEC)
+
+                                     npwx = 16879
+
+ SETUP NONLOCAL PSEUDOPOTENTIALS IN PLANE WAVE BASIS
+ C non-local projectors:
+ projector 1 L=0
+ projector 2 L=0
+ projector 3 L=1
+ projector 4 L=1
+ H non-local projectors:
+ projector 1 L=0
+ projector 2 L=0
+      TOTAL NUMBER OF NONLOCAL PROJECTORS = 16
+ DONE : LOCAL POTENTIAL Time : 0.339339256287 (SEC)
+
+
+ Init Non-Local PseudoPotential table : 
+ Init Non-Local-Pseudopotential done.
+ DONE : NON-LOCAL POTENTIAL Time : 0.352857112885 (SEC)
+
+                                start_pot = atomic
+ DONE : INIT POTENTIAL Time : 0.642299 (SEC)
+
+
+ Make real space PAO into reciprocal space.
+       max mesh points in Pseudopotential = 601
+     dq(describe PAO in reciprocal space) = 0.01
+                                    max q = 1206
+
+ number of pseudo atomic orbitals for C is 0
+
+ number of pseudo atomic orbitals for H is 0
+ DONE : INIT BASIS Time : 0.859031 (SEC)
+
+ -------------------------------------------
+ -------------------------------------------
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   1--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       8.000000       1.417983
+
+ Density error is 0.705428908048
+                          Error Threshold = 0.010000000000
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.017544057872             -217.929867153100
+     E_Harris              -16.408058532460             -223.243089158968
+      E_Fermi               -0.626082258799               -8.518286136372
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   2--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.000000       0.623305
+
+ Density error is 0.037193130746
+                          Error Threshold = 0.008817861351
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.144996950340             -219.663952717248
+     E_Harris              -16.158845333973             -219.852369642740
+      E_Fermi               -0.425221736464               -5.785438529359
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   3--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.750000       0.765803
+
+ Density error is 0.008021287795
+                          Error Threshold = 0.000464914134
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143766641525             -219.647213507066
+     E_Harris              -16.146720471833             -219.687402430176
+      E_Fermi               -0.423547802407               -5.762663488108
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   4--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.250000       0.691389
+
+ Density error is 0.001345350159
+                          Error Threshold = 0.000100266097
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143852570867             -219.648382635743
+     E_Harris              -16.144348491782             -219.655129985945
+      E_Fermi               -0.417940644498               -5.686374190966
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   5--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.500000       0.724421
+
+ Density error is 0.000193438710
+                          Error Threshold = 0.000016816877
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143964486009             -219.649905319365
+     E_Harris              -16.144043239940             -219.650976821572
+      E_Fermi               -0.412189586104               -5.608127027270
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   6--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       4.750000       0.929892
+
+ Density error is 0.000033144966
+                          Error Threshold = 0.000002417984
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143962062113             -219.649872340569
+     E_Harris              -16.143973121330             -219.650022808940
+      E_Fermi               -0.408955040431               -5.564118775681
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   7--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       2.625000       0.571903
+
+ Density error is 0.000005013879
+                          Error Threshold = 0.000000414312
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143964909086             -219.649911075624
+     E_Harris              -16.143965842245             -219.649923771901
+      E_Fermi               -0.407628831587               -5.546074778663
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   8--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.625000       0.729712
+
+ Density error is 0.000001011179
+                          Error Threshold = 0.000000062673
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143964999058             -219.649912299760
+     E_Harris              -16.143965129574             -219.649914075510
+      E_Fermi               -0.407392472263               -5.542858945081
+
+ PW ALGORITHM --------------- ION=   1  ELEC=   9--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.500000       0.698617
+
+ Density error is 0.000000165426
+                          Error Threshold = 0.000000012640
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143965114417             -219.649913869298
+     E_Harris              -16.143965110616             -219.649913817582
+      E_Fermi               -0.407267643914               -5.541160568271
+
+ PW ALGORITHM --------------- ION=   1  ELEC=  10--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.000000       0.629669
+
+ Density error is 0.000000034854
+                          Error Threshold = 0.000000002068
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143965129640             -219.649914076412
+     E_Harris              -16.143965128874             -219.649914065985
+      E_Fermi               -0.407195695883               -5.540181665084
+
+ PW ALGORITHM --------------- ION=   1  ELEC=  11--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       4.000000       0.813535
+
+ Density error is 0.000000008107
+                          Error Threshold = 0.000000000436
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143965128688             -219.649914063463
+     E_Harris              -16.143965130414             -219.649914086943
+      E_Fermi               -0.407157690601               -5.539664576698
+
+ PW ALGORITHM --------------- ION=   1  ELEC=  12--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       3.125000       0.674851
+
+ Density error is 0.000000001913
+                          Error Threshold = 0.000000000101
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143965127817             -219.649914051607
+     E_Harris              -16.143965128941             -219.649914066906
+      E_Fermi               -0.407141275858               -5.539441242659
+
+ PW ALGORITHM --------------- ION=   1  ELEC=  13--------------------------------
+  K-point    CG iter num      Time(Sec)
+        1       4.000000       0.804644
+
+ Density error is 0.000000000482
+                          Error Threshold = 0.000000000024
+
+       Energy                       Rydberg                            eV
+   E_KohnSham              -16.143965127167             -219.649914042766
+     E_Harris              -16.143965127871             -219.649914052349
+       E_band               -5.963675568058              -81.139968748982
+   E_one_elec              -25.076858425354             -341.188162524121
+    E_Hartree              +13.701412040584             +186.417274397756
+         E_xc               -6.404563484424              -87.138556590897
+      E_Ewald               +1.636044742026              +22.259530674496
+      E_demet               -0.000000000000               -0.000000000000
+      E_descf               +0.000000000000               +0.000000000000
+     E_efield               +0.000000000000               +0.000000000000
+      E_Fermi               -0.407135332076               -5.539360373357
+ charge density convergence is achieved
+ final etot is -219.649914042766 eV
+
+ STATE ENERGY(eV) AND OCCUPATIONS. 1/1 kpoint (Cartesian) = 0.00000 0.00000 0.00000 (16879 pws)
+ [spin1_state]        1     -15.903159       2.000000
+ [spin1_state]        2      -8.412500       2.000000
+ [spin1_state]        3      -8.255893       2.000000
+ [spin1_state]        4      -7.998432       2.000000
+ [spin1_state]        5      -0.514947       0.000000
+ [spin1_state]        6       2.727369       0.000000
+ [spin1_state]        7       3.067094       0.000000
+ [spin1_state]        8       4.824439       0.000000
+
+
+ --------------------------------------------
+ !FINAL_ETOT_IS -219.6499140427659142 eV
+ --------------------------------------------
+
+
+
+
+
+
+  |CLASS_NAME---------|NAME---------------|TIME(Sec)-----|CALLS----|AVG------|PER%-------
+  A             DC_Driv             reading         +0.104         1     +0.10     +0.59%
+  A             DC_Driv         divide_frag          +0.19         1     +0.19     +1.09%
+  B            PW_Basis              gen_pw          +0.19         1     +0.19     +1.09%
+  A             DC_Driv         solve_eachf         +17.20         1    +17.20    +98.32%
+  B            Run_Frag        frag_pw_line         +17.20         1    +17.20    +98.32%
+  X                 FFT               FFT3D          +9.69      1332     +0.01    +55.38%
+  E           potential            v_of_rho          +3.20        14     +0.23    +18.27%
+  C            wavefunc             wfcinit          +0.22         1     +0.22     +1.24%
+  G           Hamilt_PW            cinitcgg          +2.31        14     +0.17    +13.20%
+  H           Hamilt_PW               h_psi          +9.29       513     +0.02    +53.10%
+  I           Hamilt_PW          add_vuspsi          +0.45       513     +0.00     +2.57%
+  C                Ions         opt_ions_pw         +16.63         1    +16.63    +95.07%
+  D           electrons     self_consistent         +14.90         1    +14.90    +85.15%
+  E           electrons             c_bands         +10.25        13     +0.79    +58.60%
+  F              Hamilt               diago         +10.08        13     +0.78    +57.59%
+  G            Diago_CG                diag          +7.93        13     +0.61    +45.35%
+  E              Charge             mix_rho          +0.35        13     +0.03     +1.98%
+ ----------------------------------------------------------------------------------------
+
+ CLASS_NAME---------|NAME---------------|MEMORY(MB)--------
+                                         +29.4953
+             PW_Basis           struc_fac        +4.1212
+              Use_FFT              porter        +4.0000
+             wavefunc                 evc        +2.0604
+               Charge                 rho        +2.0000
+               Charge            rho_save        +2.0000
+               Charge            rho_core        +2.0000
+            potential               vltot        +2.0000
+            potential                  vr        +2.0000
+            potential                 vrs        +2.0000
+            potential                vrs1        +2.0000
+            potential                vnew        +2.0000
+               Charge                rhog        +1.0303
+               Charge           rhog_save        +1.0303
+               Charge           rhog_core        +1.0303
+ ----------------------------------------------------------
+
+ Start  Time  : Fri May  7 16:18:50 2021
+ Finish Time  : Fri May  7 16:19:08 2021
+ Total  Time  : +0 h +0 mins +18 secs 
diff --git a/tests/test_abacus_pw_scf.py b/tests/test_abacus_pw_scf.py
index 8d13dddcf..20751f819 100644
--- a/tests/test_abacus_pw_scf.py
+++ b/tests/test_abacus_pw_scf.py
@@ -146,5 +146,23 @@ def test_return_zero(self):
         self.assertEqual(len(self.system_ch4), 0)
 
 
+class TestABACUSLabeledOutputNoFS(unittest.TestCase):
+    def setUp(self):
+        shutil.copy("abacus.scf/INPUT.ch4-noforcestress", "abacus.scf/INPUT")
+
+    def tearDown(self):
+        if os.path.isfile("abacus.scf/INPUT"):
+            os.remove("abacus.scf/INPUT")
+
+    def test_noforcestress_job(self):
+        # check below will not throw error
+        system_ch4 = dpdata.LabeledSystem("abacus.scf", fmt="abacus/scf")
+        # check the returned force is empty
+        self.assertFalse(system_ch4.data["forces"])
+        self.assertTrue("virials" not in system_ch4.data)
+        # test append self
+        system_ch4.append(system_ch4)
+
+
 if __name__ == "__main__":
     unittest.main()

From 1c6223939738769aaafaa28963bb596090611016 Mon Sep 17 00:00:00 2001
From: "A bot of @njzjz" <48687836+njzjz-bot@users.noreply.github.com>
Date: Mon, 8 Jul 2024 22:12:38 -0400
Subject: [PATCH 05/24] fix: remove ref-names from .git_archival.txt (#679)

Removing ref-names is now suggested by setuptools-scm, per
https://setuptools-scm.readthedocs.io/en/latest/usage/#git-archives, as
it may lead to the git archive checksum changing.

Generated by the task: https://github.com/njzjz-bot/njzjz-bot/issues/6.
---
 .git_archival.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.git_archival.txt b/.git_archival.txt
index 8fb235d70..7c5100942 100644
--- a/.git_archival.txt
+++ b/.git_archival.txt
@@ -1,4 +1,3 @@
 node: $Format:%H$
 node-date: $Format:%cI$
 describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$
-ref-names: $Format:%D$

From 6227f49c1a50b60bb15ccafc40ca9a1abf862d32 Mon Sep 17 00:00:00 2001
From: "A bot of @njzjz" <48687836+njzjz-bot@users.noreply.github.com>
Date: Tue, 16 Jul 2024 18:19:12 -0400
Subject: [PATCH 06/24] docs: pin sphinx-argparse to < 0.5.0 (#683)

Pin sphinx-argparse to <0.5.0 due to sphinx-doc/sphinx-argparse#56.

Generated by the task: https://github.com/njzjz-bot/njzjz-bot/issues/7.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated `sphinx-argparse` dependency version to `<0.5.0` to ensure
compatibility.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 35b30decf..05a6e243b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,7 +59,7 @@ docs = [
     'numpydoc',
     'm2r2',
     'deepmodeling-sphinx>=0.1.1',
-    'sphinx-argparse',
+    'sphinx-argparse<0.5.0',
     'rdkit',
     'jupyterlite-sphinx',
     'jupyterlite-xeus',

From c21c538fae0c17ee1fa5d555e3a5ca1d8c1c48f2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 18 Jul 2024 09:03:13 +0800
Subject: [PATCH 07/24] [pre-commit.ci] pre-commit autoupdate (#677)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.4.10 →
v0.5.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.4.10...v0.5.2)
- [github.com/asottile/blacken-docs: 1.16.0 →
1.18.0](https://github.com/asottile/blacken-docs/compare/1.16.0...1.18.0)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d9e92b7b3..252fe57be 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.4.10
+    rev: v0.5.2
     hooks:
     - id: ruff
       args: ["--fix"]
@@ -36,7 +36,7 @@ repos:
       args: ["--write"]
 # Python inside docs
 -   repo: https://github.com/asottile/blacken-docs
-    rev: 1.16.0
+    rev: 1.18.0
     hooks:
     -   id: blacken-docs
 ci:

From cc4001a4cae68ad0e440996710638af58800d5d6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 23 Jul 2024 11:19:56 +0800
Subject: [PATCH 08/24] [pre-commit.ci] pre-commit autoupdate (#685)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.2 →
v0.5.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.2...v0.5.4)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 252fe57be..373e0d222 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.5.2
+    rev: v0.5.4
     hooks:
     - id: ruff
       args: ["--fix"]

From 42dd136c050dc21f82ce840677813544e6c98db0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 31 Jul 2024 17:46:37 -0400
Subject: [PATCH 09/24] Bump CodSpeedHQ/action from 2 to 3 (#691)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [CodSpeedHQ/action](https://github.com/codspeedhq/action) from 2
to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/codspeedhq/action/releases">CodSpeedHQ/action's
releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<h2>What's Changed</h2>
<ul>
<li>chore(ci): run ci on every pull request by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/23">CodSpeedHQ/runner#23</a></li>
<li>feat/add codspeed cli by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/36">CodSpeedHQ/runner#36</a></li>
<li>feat: bump rust toolchain by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/37">CodSpeedHQ/runner#37</a></li>
</ul>
<p>Check <a
href="https://github.com/CodSpeedHQ/runner/releases/tag/v3.0.0">the
runner release notes</a> for more details.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/CodSpeedHQ/action/compare/v2...v3.0.0">https://github.com/CodSpeedHQ/action/compare/v2...v3.0.0</a></p>
<h2>v2.4.4</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: report that a new version is available by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/action/pull/110">CodSpeedHQ/action#110</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/CodSpeedHQ/action/compare/v2.4.3...v2.4.4">https://github.com/CodSpeedHQ/action/compare/v2.4.3...v2.4.4</a></p>
<h2>v2.4.3</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: bump runner version to 2.4.3 by <a
href="https://github.com/github-actions"><code>@​github-actions</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/action/pull/106">CodSpeedHQ/action#106</a></li>
</ul>
<p>Check <a
href="https://github.com/CodSpeedHQ/runner/releases/tag/v2.4.3">the
runner release notes</a> for more details.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/CodSpeedHQ/action/compare/v2.4.2...v2.4.3">https://github.com/CodSpeedHQ/action/compare/v2.4.2...v2.4.3</a></p>
<h2>v2.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: update changelog with missing releases by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/action/pull/102">CodSpeedHQ/action#102</a></li>
<li>chore: bump runner version to 2.4.2 by <a
href="https://github.com/github-actions"><code>@​github-actions</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/action/pull/103">CodSpeedHQ/action#103</a></li>
</ul>
<p>Check <a
href="https://github.com/CodSpeedHQ/runner/releases/tag/v2.4.2">the
runner release notes</a> for more details.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/CodSpeedHQ/action/compare/v2.4.1...v2.4.2">https://github.com/CodSpeedHQ/action/compare/v2.4.1...v2.4.2</a></p>
<h2>v2.4.1</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: bump runner version to 2.4.1 by <a
href="https://github.com/github-actions"><code>@​github-actions</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/action/pull/100">CodSpeedHQ/action#100</a></li>
</ul>
<p>Check <a
href="https://github.com/CodSpeedHQ/runner/releases/tag/v2.4.1">the
runner release notes</a> for more details.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/CodSpeedHQ/action/compare/v2.4.0...v2.4.1">https://github.com/CodSpeedHQ/action/compare/v2.4.0...v2.4.1</a></p>
<h2>v2.4.0</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: bump runner version to 2.4.0 by <a
href="https://github.com/github-actions"><code>@​github-actions</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/action/pull/99">CodSpeedHQ/action#99</a></li>
</ul>
<p>Check <a
href="https://github.com/CodSpeedHQ/runner/releases/tag/v2.4.0">the
runner release notes</a> for more details.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/CodSpeedHQ/action/compare/v2.3.1...v2.4.0">https://github.com/CodSpeedHQ/action/compare/v2.3.1...v2.4.0</a></p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/CodSpeedHQ/action/blob/main/CHANGELOG.md">CodSpeedHQ/action's
changelog</a>.</em></p>
<blockquote>
<h2>[3.0.0] - 2024-07-26</h2>
<ul>
<li>chore(ci): run ci on every pull request, by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/23">CodSpeedHQ/runner#23</a></li>
<li>feat/add codspeed cli, by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/36">CodSpeedHQ/runner#36</a></li>
<li>feat: bump rust toolchain, by <a
href="https://github.com/adriencaccia"><code>@​adriencaccia</code></a>
in <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/37">CodSpeedHQ/runner#37</a></li>
</ul>
<h2>[2.4.4] - 2024-07-26</h2>
<ul>
<li>chore: report that a new version is available</li>
</ul>
<h2>[2.4.3] - 2024-07-12</h2>
<ul>
<li>chore: add error chain debug, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/34">CodSpeedHQ/runner#34</a></li>
</ul>
<h2>[2.4.2] - 2024-06-14</h2>
<ul>
<li>feat(upload): better upload endpoint error handling, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/29">CodSpeedHQ/runner#29</a></li>
</ul>
<h2>[2.4.1] - 2024-04-29</h2>
<ul>
<li>fix: retrieve root_repository_path from git dir, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/20">CodSpeedHQ/runner#20</a></li>
</ul>
<h2>[2.4.0] - 2024-04-26</h2>
<ul>
<li>feat: use current checked-out commit hash in UploadMetadata, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/18">CodSpeedHQ/runner#18</a></li>
</ul>
<h2>[2.3.1] - 2024-04-24</h2>
<ul>
<li>fix: properly display stderr and stdout when a setup command fails,
see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/19">CodSpeedHQ/runner#19</a></li>
</ul>
<h2>[2.3.0] - 2024-02-31</h2>
<ul>
<li>feat(runner): support debian 11 and 12, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/17">CodSpeedHQ/runner#17</a></li>
</ul>
<h2>[2.2.1] - 2024-02-22</h2>
<ul>
<li>feat: handle symlinks in ignored objects, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/16">CodSpeedHQ/runner#16</a></li>
<li>chore: add a post anounce bump workflow, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/15">CodSpeedHQ/runner#15</a></li>
</ul>
<h2>[2.2.0] - 2024-02-22</h2>
<ul>
<li>feat: upload execution logs to ease debugging, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/14">CodSpeedHQ/runner#14</a></li>
</ul>
<h2>[2.1.1] - 2024-02-02</h2>
<ul>
<li>feat: integrate the mongodb instrument, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/9">CodSpeedHQ/runner#9</a></li>
<li>fix(instruments): use IP address instead of localhost for MongoDB
URI, see <a
href="https://redirect.github.com/CodSpeedHQ/runner/pull/13">CodSpeedHQ/runner#13</a></li>
</ul>
<h2>[2.0.3] - 2024-01-04</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/CodSpeedHQ/action/commit/ab07afd34cbbb7a1306e8d14b7cc44e029eee37a"><code>ab07afd</code></a>
Release v3.0.0 🚀</li>
<li><a
href="https://github.com/CodSpeedHQ/action/commit/5cb1199ae3d52833804a5a84cf9634f15c47785e"><code>5cb1199</code></a>
chore: bump runner version to 3.0.0</li>
<li>See full diff in <a
href="https://github.com/codspeedhq/action/compare/v2...v3">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=CodSpeedHQ/action&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
Dependabot will merge this PR once CI passes on it, as requested by
@njzjz.

[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/benchmark.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 25650184b..1dd682776 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -17,7 +17,7 @@ jobs:
     - name: Install dependencies
       run: uv pip install --system .[test,amber,ase,pymatgen,benchmark] rdkit openbabel-wheel
     - name: Run benchmarks
-      uses: CodSpeedHQ/action@v2
+      uses: CodSpeedHQ/action@v3
       with:
         token: ${{ secrets.CODSPEED_TOKEN }}
         run: pytest benchmark/ --codspeed

From c565517191d4e26961664195406e1c407fc70c28 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 3 Aug 2024 16:29:13 +0800
Subject: [PATCH 10/24] [pre-commit.ci] pre-commit autoupdate (#690)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.4 →
v0.5.5](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.4...v0.5.5)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 373e0d222..a82074bf8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.5.4
+    rev: v0.5.5
     hooks:
     - id: ruff
       args: ["--fix"]

From 5b54e3d8a0521542a0fb4c68d538e2a98670936a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:45:53 +0800
Subject: [PATCH 11/24] [pre-commit.ci] pre-commit autoupdate (#694)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.5 →
v0.5.6](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.5...v0.5.6)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a82074bf8..1bb2fcc02 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.5.5
+    rev: v0.5.6
     hooks:
     - id: ruff
       args: ["--fix"]

From 3c19cedeba8e65cc93604cdf9900dcc58fa75100 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 13 Aug 2024 08:50:07 +0800
Subject: [PATCH 12/24] [pre-commit.ci] pre-commit autoupdate (#696)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.6 →
v0.5.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.6...v0.5.7)
<!--pre-commit.ci end-->

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated the Ruff pre-commit hook to the latest version (v0.5.7) for
improved linting of Python code.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1bb2fcc02..4f16df290 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.5.6
+    rev: v0.5.7
     hooks:
     - id: ruff
       args: ["--fix"]

From cbcbb92ce959ff08647c5e2587599a54c9cc3663 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:15:49 +0800
Subject: [PATCH 13/24] [pre-commit.ci] pre-commit autoupdate (#697)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.5.7 →
v0.6.1](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.7...v0.6.1)
<!--pre-commit.ci end-->

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated the version of the Ruff pre-commit hook from v0.5.7 to v0.6.1,
potentially enhancing linting and formatting during pre-commit checks.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4f16df290..c255e2b1c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.5.7
+    rev: v0.6.1
     hooks:
     - id: ruff
       args: ["--fix"]

From 2fb0804116761aef072021e412b589be551c2f69 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 26 Aug 2024 20:53:38 -0400
Subject: [PATCH 14/24] docs: migrate from m2r2 to myst-parser (#701)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Documentation Updates**
- Updated documentation generation tools to use `myst_parser`, enhancing
Markdown compatibility and features.
	- Improved documentation processing and rendering capabilities.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 docs/conf.py   | 2 +-
 pyproject.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index e3c0b3d4c..a21d19d56 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -49,7 +49,7 @@
     "sphinx.ext.viewcode",
     "sphinx.ext.intersphinx",
     "numpydoc",
-    "m2r2",
+    "myst_parser",
     "sphinxarg.ext",
     "jupyterlite_sphinx",
 ]
diff --git a/pyproject.toml b/pyproject.toml
index 05a6e243b..4bf2e64b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,7 @@ docs = [
     'recommonmark',
     'sphinx_rtd_theme>=1.0.0rc1',
     'numpydoc',
-    'm2r2',
+    'myst-parser',
     'deepmodeling-sphinx>=0.1.1',
     'sphinx-argparse<0.5.0',
     'rdkit',

From 0a50f4d5d62b070b92f708ace0c83b8281476d62 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 01:25:03 +0000
Subject: [PATCH 15/24] [pre-commit.ci] pre-commit autoupdate (#700)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.6.1 →
v0.6.2](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.1...v0.6.2)
<!--pre-commit.ci end-->

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Chores**
- Updated the version of the Ruff pre-commit hook to enhance performance
and reliability.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c255e2b1c..f4abb50c3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.1
+    rev: v0.6.2
     hooks:
     - id: ruff
       args: ["--fix"]

From 676517aa64b6b2825a6551655c79cd93449a15be Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 28 Aug 2024 21:34:11 -0400
Subject: [PATCH 16/24] feat: customized dtypes for unlabeled deepmd (#702)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enhanced flexibility in data type handling for system data processing
based on the presence of labels.
- New data type registration for "foo" with updated shape and
configuration.

- **Bug Fixes**
- Improved adaptability and performance in data processing functions by
refining dtype selection.

- **Refactor**
- Streamlined test structure for loading and dumping data types,
improving organization and maintainability of test cases.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/deepmd/comp.py                       | 76 ++++++++++++---------
 dpdata/deepmd/hdf5.py                       | 13 +++-
 dpdata/deepmd/raw.py                        | 76 +++++++++++----------
 tests/plugin/dpdata_plugin_test/__init__.py |  4 ++
 tests/test_custom_data_type.py              | 38 ++++++++---
 5 files changed, 126 insertions(+), 81 deletions(-)

diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index ab0044477..d6ccf3ffa 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -67,40 +67,43 @@ def to_system_data(folder, type_map=None, labels=True):
         data["virials"] = np.concatenate(all_virs, axis=0)
     # allow custom dtypes
     if labels:
-        for dtype in dpdata.system.LabeledSystem.DTYPES:
-            if dtype.name in (
-                "atom_numbs",
-                "atom_names",
-                "atom_types",
-                "orig",
-                "cells",
-                "coords",
-                "real_atom_types",
-                "real_atom_names",
-                "nopbc",
-                "energies",
-                "forces",
-                "virials",
-            ):
-                # skip as these data contains specific rules
-                continue
-            if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
-                warnings.warn(
-                    f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format."
-                )
-                continue
-            natoms = data["coords"].shape[1]
-            shape = [
-                natoms if xx == dpdata.system.Axis.NATOMS else xx
-                for xx in dtype.shape[1:]
-            ]
-            all_data = []
-            for ii in sets:
-                tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy"))
-                if tmp is not None:
-                    all_data.append(np.reshape(tmp, [tmp.shape[0], *shape]))
-            if len(all_data) > 0:
-                data[dtype.name] = np.concatenate(all_data, axis=0)
+        dtypes = dpdata.system.LabeledSystem.DTYPES
+    else:
+        dtypes = dpdata.system.System.DTYPES
+
+    for dtype in dtypes:
+        if dtype.name in (
+            "atom_numbs",
+            "atom_names",
+            "atom_types",
+            "orig",
+            "cells",
+            "coords",
+            "real_atom_types",
+            "real_atom_names",
+            "nopbc",
+            "energies",
+            "forces",
+            "virials",
+        ):
+            # skip as these data contains specific rules
+            continue
+        if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+            warnings.warn(
+                f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/npy format."
+            )
+            continue
+        natoms = data["coords"].shape[1]
+        shape = [
+            natoms if xx == dpdata.system.Axis.NATOMS else xx for xx in dtype.shape[1:]
+        ]
+        all_data = []
+        for ii in sets:
+            tmp = _cond_load_data(os.path.join(ii, dtype.name + ".npy"))
+            if tmp is not None:
+                all_data.append(np.reshape(tmp, [tmp.shape[0], *shape]))
+        if len(all_data) > 0:
+            data[dtype.name] = np.concatenate(all_data, axis=0)
     return data
 
 
@@ -173,6 +176,11 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
         with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
     # allow custom dtypes
+    labels = "energies" in data
+    if labels:
+        dtypes = dpdata.system.LabeledSystem.DTYPES
+    else:
+        dtypes = dpdata.system.System.DTYPES
     for dtype in dpdata.system.LabeledSystem.DTYPES:
         if dtype.name in (
             "atom_numbs",
diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py
index 34ae9dbef..78f4e33fe 100644
--- a/dpdata/deepmd/hdf5.py
+++ b/dpdata/deepmd/hdf5.py
@@ -102,7 +102,11 @@ def to_system_data(
         },
     }
     # allow custom dtypes
-    for dtype in dpdata.system.LabeledSystem.DTYPES:
+    if labels:
+        dtypes = dpdata.system.LabeledSystem.DTYPES
+    else:
+        dtypes = dpdata.system.System.DTYPES
+    for dtype in dtypes:
         if dtype.name in (
             "atom_numbs",
             "atom_names",
@@ -210,8 +214,13 @@ def dump(
         "virials": {"fn": "virial", "shape": (nframes, 9), "dump": True},
     }
 
+    labels = "energies" in data
+    if labels:
+        dtypes = dpdata.system.LabeledSystem.DTYPES
+    else:
+        dtypes = dpdata.system.System.DTYPES
     # allow custom dtypes
-    for dtype in dpdata.system.LabeledSystem.DTYPES:
+    for dtype in dtypes:
         if dtype.name in (
             "atom_numbs",
             "atom_names",
diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py
index e772714a1..11a9ff6ed 100644
--- a/dpdata/deepmd/raw.py
+++ b/dpdata/deepmd/raw.py
@@ -64,40 +64,41 @@ def to_system_data(folder, type_map=None, labels=True):
             data["nopbc"] = True
         # allow custom dtypes
         if labels:
-            for dtype in dpdata.system.LabeledSystem.DTYPES:
-                if dtype.name in (
-                    "atom_numbs",
-                    "atom_names",
-                    "atom_types",
-                    "orig",
-                    "cells",
-                    "coords",
-                    "real_atom_types",
-                    "real_atom_names",
-                    "nopbc",
-                    "energies",
-                    "forces",
-                    "virials",
-                ):
-                    # skip as these data contains specific rules
-                    continue
-                if not (
-                    len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES
-                ):
-                    warnings.warn(
-                        f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format."
-                    )
-                    continue
-                natoms = data["coords"].shape[1]
-                shape = [
-                    natoms if xx == dpdata.system.Axis.NATOMS else xx
-                    for xx in dtype.shape[1:]
-                ]
-                if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")):
-                    data[dtype.name] = np.reshape(
-                        np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")),
-                        [nframes, *shape],
-                    )
+            dtypes = dpdata.system.LabeledSystem.DTYPES
+        else:
+            dtypes = dpdata.system.System.DTYPES
+        for dtype in dtypes:
+            if dtype.name in (
+                "atom_numbs",
+                "atom_names",
+                "atom_types",
+                "orig",
+                "cells",
+                "coords",
+                "real_atom_types",
+                "real_atom_names",
+                "nopbc",
+                "energies",
+                "forces",
+                "virials",
+            ):
+                # skip as these data contains specific rules
+                continue
+            if not (len(dtype.shape) and dtype.shape[0] == dpdata.system.Axis.NFRAMES):
+                warnings.warn(
+                    f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted from deepmd/raw format."
+                )
+                continue
+            natoms = data["coords"].shape[1]
+            shape = [
+                natoms if xx == dpdata.system.Axis.NATOMS else xx
+                for xx in dtype.shape[1:]
+            ]
+            if os.path.exists(os.path.join(folder, f"{dtype.name}.raw")):
+                data[dtype.name] = np.reshape(
+                    np.loadtxt(os.path.join(folder, f"{dtype.name}.raw")),
+                    [nframes, *shape],
+                )
         return data
     else:
         raise RuntimeError("not dir " + folder)
@@ -144,7 +145,12 @@ def dump(folder, data):
         with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
     # allow custom dtypes
-    for dtype in dpdata.system.LabeledSystem.DTYPES:
+    labels = "energies" in data
+    if labels:
+        dtypes = dpdata.system.LabeledSystem.DTYPES
+    else:
+        dtypes = dpdata.system.System.DTYPES
+    for dtype in dtypes:
         if dtype.name in (
             "atom_numbs",
             "atom_names",
diff --git a/tests/plugin/dpdata_plugin_test/__init__.py b/tests/plugin/dpdata_plugin_test/__init__.py
index ef26e7c1d..4e445eb2f 100644
--- a/tests/plugin/dpdata_plugin_test/__init__.py
+++ b/tests/plugin/dpdata_plugin_test/__init__.py
@@ -10,6 +10,10 @@
     DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False), labeled=True
 )
 
+register_data_type(
+    DataType("foo", np.ndarray, (Axis.NFRAMES, 3, 3), required=False), labeled=False
+)
+
 register_data_type(
     DataType("bar", np.ndarray, (Axis.NFRAMES, Axis.NATOMS, -1), required=False),
     labeled=True,
diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py
index e94ba5e0f..a319af35f 100644
--- a/tests/test_custom_data_type.py
+++ b/tests/test_custom_data_type.py
@@ -9,10 +9,12 @@
 from dpdata.data_type import Axis, DataType
 
 
-class TestDeepmdLoadDumpComp(unittest.TestCase):
+class DeepmdLoadDumpCompTest:
     def setUp(self):
-        self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")
-        self.foo = np.ones((len(self.system), 2, 4))
+        self.system = self.cls(
+            data=dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar").data
+        )
+        self.foo = np.ones((len(self.system), *self.shape))
         self.system.data["foo"] = self.foo
         self.system.check_data()
 
@@ -23,7 +25,7 @@ def test_to_deepmd_raw(self):
 
     def test_from_deepmd_raw(self):
         self.system.to_deepmd_raw("data_foo")
-        x = dpdata.LabeledSystem("data_foo", fmt="deepmd/raw")
+        x = self.cls("data_foo", fmt="deepmd/raw")
         np.testing.assert_allclose(x.data["foo"], self.foo)
 
     def test_to_deepmd_npy(self):
@@ -33,7 +35,7 @@ def test_to_deepmd_npy(self):
 
     def test_from_deepmd_npy(self):
         self.system.to_deepmd_npy("data_foo")
-        x = dpdata.LabeledSystem("data_foo", fmt="deepmd/npy")
+        x = self.cls("data_foo", fmt="deepmd/npy")
         np.testing.assert_allclose(x.data["foo"], self.foo)
 
     def test_to_deepmd_hdf5(self):
@@ -44,18 +46,34 @@ def test_to_deepmd_hdf5(self):
 
     def test_from_deepmd_hdf5(self):
         self.system.to_deepmd_hdf5("data_foo.h5")
-        x = dpdata.LabeledSystem("data_foo.h5", fmt="deepmd/hdf5")
+        x = self.cls("data_foo.h5", fmt="deepmd/hdf5")
         np.testing.assert_allclose(x.data["foo"], self.foo)
 
     def test_duplicated_data_type(self):
-        dt = DataType("foo", np.ndarray, (Axis.NFRAMES, 2, 4), required=False)
-        n_dtypes_old = len(dpdata.LabeledSystem.DTYPES)
+        dt = DataType("foo", np.ndarray, (Axis.NFRAMES, *self.shape), required=False)
+        n_dtypes_old = len(self.cls.DTYPES)
         with self.assertWarns(UserWarning):
-            dpdata.LabeledSystem.register_data_type(dt)
-        n_dtypes_new = len(dpdata.LabeledSystem.DTYPES)
+            self.cls.register_data_type(dt)
+        n_dtypes_new = len(self.cls.DTYPES)
         self.assertEqual(n_dtypes_old, n_dtypes_new)
 
 
+class TestDeepmdLoadDumpCompUnlabeled(unittest.TestCase, DeepmdLoadDumpCompTest):
+    cls = dpdata.System
+    shape = (3, 3)
+
+    def setUp(self):
+        DeepmdLoadDumpCompTest.setUp(self)
+
+
+class TestDeepmdLoadDumpCompLabeled(unittest.TestCase, DeepmdLoadDumpCompTest):
+    cls = dpdata.LabeledSystem
+    shape = (2, 4)
+
+    def setUp(self):
+        DeepmdLoadDumpCompTest.setUp(self)
+
+
 class TestDeepmdLoadDumpCompAny(unittest.TestCase):
     def setUp(self):
         self.system = dpdata.LabeledSystem("poscars/OUTCAR.h2o.md", fmt="vasp/outcar")

From f8a1b6b20eb9ac7f1aa6d20ee806a59e6fb00c11 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Sat, 31 Aug 2024 18:33:46 +0800
Subject: [PATCH 17/24] test: support comparison between two multi systems
 (#705)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit


- **New Features**
- Introduced functions and classes to enhance testing capabilities for
multi-system comparisons.
- Added validation classes for periodic boundary conditions across
multi-system objects.

- **Bug Fixes**
- Updated test classes to utilize new multi-system handling, improving
clarity and functionality.

- **Documentation**
- Enhanced clarity in variable naming for better alignment with
multi-system concepts.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 tests/comp_sys.py          | 84 ++++++++++++++++++++++++++++++++++++++
 tests/test_deepmd_mixed.py | 26 +++++++-----
 2 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/tests/comp_sys.py b/tests/comp_sys.py
index 99879af61..a3a916a09 100644
--- a/tests/comp_sys.py
+++ b/tests/comp_sys.py
@@ -105,6 +105,72 @@ def test_virial(self):
         )
 
 
+def _make_comp_ms_test_func(comp_sys_test_func):
+    """
+    Dynamically generates a test function for multi-system comparisons.
+
+    Args:
+        comp_sys_test_func (Callable): The original test function for single systems.
+
+    Returns
+    -------
+    Callable: A new test function that can handle comparisons between multi-systems.
+    """
+
+    def comp_ms_test_func(iobj):
+        assert hasattr(iobj, "ms_1") and hasattr(
+            iobj, "ms_2"
+        ), "Multi-system objects must be present"
+        iobj.assertEqual(len(iobj.ms_1), len(iobj.ms_2))
+        keys = [ii.formula for ii in iobj.ms_1]
+        keys_2 = [ii.formula for ii in iobj.ms_2]
+        assert sorted(keys) == sorted(
+            keys_2
+        ), f"Keys of two MS are not equal: {keys} != {keys_2}"
+        for kk in keys:
+            iobj.system_1 = iobj.ms_1[kk]
+            iobj.system_2 = iobj.ms_2[kk]
+            comp_sys_test_func(iobj)
+        del iobj.system_1
+        del iobj.system_2
+
+    return comp_ms_test_func
+
+
+def _make_comp_ms_class(comp_class):
+    """
+    Dynamically generates a test class for multi-system comparisons.
+
+    Args:
+        comp_class (type): The original test class for single systems.
+
+    Returns
+    -------
+    type: A new test class that can handle comparisons between multi-systems.
+    """
+
+    class CompMS:
+        pass
+
+    test_methods = [
+        func
+        for func in dir(comp_class)
+        if callable(getattr(comp_class, func)) and func.startswith("test_")
+    ]
+
+    for func in test_methods:
+        setattr(CompMS, func, _make_comp_ms_test_func(getattr(comp_class, func)))
+
+    return CompMS
+
+
+# MultiSystems comparison from single System comparison
+CompMultiSys = _make_comp_ms_class(CompSys)
+
+# LabeledMultiSystems comparison from single LabeledSystem comparison
+CompLabeledMultiSys = _make_comp_ms_class(CompLabeledSys)
+
+
 class MultiSystems:
     def test_systems_name(self):
         self.assertEqual(set(self.systems.systems), set(self.system_names))
@@ -127,3 +193,21 @@ class IsNoPBC:
     def test_is_nopbc(self):
         self.assertTrue(self.system_1.nopbc)
         self.assertTrue(self.system_2.nopbc)
+
+
+class MSAllIsPBC:
+    def test_is_pbc(self):
+        assert hasattr(self, "ms_1") and hasattr(
+            self, "ms_2"
+        ), "Multi-system objects must be present and iterable"
+        self.assertTrue(all([not ss.nopbc for ss in self.ms_1]))
+        self.assertTrue(all([not ss.nopbc for ss in self.ms_2]))
+
+
+class MSAllIsNoPBC:
+    def test_is_nopbc(self):
+        assert hasattr(self, "ms_1") and hasattr(
+            self, "ms_2"
+        ), "Multi-system objects must be present and iterable"
+        self.assertTrue(all([ss.nopbc for ss in self.ms_1]))
+        self.assertTrue(all([ss.nopbc for ss in self.ms_2]))
diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py
index 70a09dbe8..3c28d1b10 100644
--- a/tests/test_deepmd_mixed.py
+++ b/tests/test_deepmd_mixed.py
@@ -6,12 +6,18 @@
 from glob import glob
 
 import numpy as np
-from comp_sys import CompLabeledSys, IsNoPBC, MultiSystems
+from comp_sys import (
+    CompLabeledMultiSys,
+    CompLabeledSys,
+    IsNoPBC,
+    MSAllIsNoPBC,
+    MultiSystems,
+)
 from context import dpdata
 
 
 class TestMixedMultiSystemsDumpLoad(
-    unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC
+    unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC
 ):
     def setUp(self):
         self.places = 6
@@ -62,8 +68,8 @@ def setUp(self):
         self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy")
         self.systems = dpdata.MultiSystems()
         self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed")
-        self.system_1 = self.ms["C1H4A0B0D0"]
-        self.system_2 = self.systems["C1H4A0B0D0"]
+        self.ms_1 = self.ms
+        self.ms_2 = self.systems
         mixed_sets = glob("tmp.deepmd.mixed/*/set.*")
         self.assertEqual(len(mixed_sets), 2)
         for i in mixed_sets:
@@ -112,7 +118,7 @@ def test_str(self):
 
 
 class TestMixedMultiSystemsDumpLoadSetSize(
-    unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC
+    unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC
 ):
     def setUp(self):
         self.places = 6
@@ -163,8 +169,8 @@ def setUp(self):
         self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy")
         self.systems = dpdata.MultiSystems()
         self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed")
-        self.system_1 = self.ms["C1H4A0B0D0"]
-        self.system_2 = self.systems["C1H4A0B0D0"]
+        self.ms_1 = self.ms
+        self.ms_2 = self.systems
         mixed_sets = glob("tmp.deepmd.mixed/*/set.*")
         self.assertEqual(len(mixed_sets), 5)
         for i in mixed_sets:
@@ -213,7 +219,7 @@ def test_str(self):
 
 
 class TestMixedMultiSystemsTypeChange(
-    unittest.TestCase, CompLabeledSys, MultiSystems, IsNoPBC
+    unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC
 ):
     def setUp(self):
         self.places = 6
@@ -265,8 +271,8 @@ def setUp(self):
         self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy")
         self.systems = dpdata.MultiSystems(type_map=["TOKEN"])
         self.systems.from_deepmd_npy_mixed("tmp.deepmd.mixed", fmt="deepmd/npy/mixed")
-        self.system_1 = self.ms["TOKEN0C1H4A0B0D0"]
-        self.system_2 = self.systems["TOKEN0C1H4A0B0D0"]
+        self.ms_1 = self.ms
+        self.ms_2 = self.systems
         mixed_sets = glob("tmp.deepmd.mixed/*/set.*")
         self.assertEqual(len(mixed_sets), 2)
         for i in mixed_sets:

From 6d082f162dc05c0980567a7544a8024b7c024a78 Mon Sep 17 00:00:00 2001
From: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
Date: Sat, 31 Aug 2024 18:39:10 +0800
Subject: [PATCH 18/24] test: mixed data format: test if the index_map (when
 type_map is provided) works  (#706)

consider the PR after #705

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enhanced testing framework for multi-system comparisons, allowing for
dynamic generation of test functions and classes.
- Introduced new test classes to validate properties of multi-system
objects regarding periodic boundary conditions.
	- Added a new test class for handling type mapping in labeled systems.

- **Bug Fixes**
- Updated existing test classes to improve clarity and consistency in
naming conventions for multi-system variables.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 tests/test_deepmd_mixed.py | 113 +++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/tests/test_deepmd_mixed.py b/tests/test_deepmd_mixed.py
index 3c28d1b10..9f0c5ed46 100644
--- a/tests/test_deepmd_mixed.py
+++ b/tests/test_deepmd_mixed.py
@@ -117,6 +117,119 @@ def test_str(self):
         )
 
 
+class TestMixedMultiSystemsDumpLoadTypeMap(
+    unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC
+):
+    def setUp(self):
+        self.places = 6
+        self.e_places = 6
+        self.f_places = 6
+        self.v_places = 6
+
+        # C1H4
+        system_1 = dpdata.LabeledSystem(
+            "gaussian/methane.gaussianlog", fmt="gaussian/log"
+        )
+
+        # C1H3
+        system_2 = dpdata.LabeledSystem(
+            "gaussian/methane_sub.gaussianlog", fmt="gaussian/log"
+        )
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 1, 2]
+        tmp_data["atom_names"] = ["C", "H", "A", "B"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3])
+        # C1H1A1B2
+        system_1_modified_type_1 = dpdata.LabeledSystem(data=tmp_data)
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 2, 1]
+        tmp_data["atom_names"] = ["C", "H", "A", "B"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 2, 3])
+        # C1H1A2B1
+        system_1_modified_type_2 = dpdata.LabeledSystem(data=tmp_data)
+
+        tmp_data = system_1.data.copy()
+        tmp_data["atom_numbs"] = [1, 1, 1, 2]
+        tmp_data["atom_names"] = ["C", "H", "A", "D"]
+        tmp_data["atom_types"] = np.array([0, 1, 2, 3, 3])
+        # C1H1A1C2
+        system_1_modified_type_3 = dpdata.LabeledSystem(data=tmp_data)
+
+        self.ms = dpdata.MultiSystems(
+            system_1,
+            system_2,
+            system_1_modified_type_1,
+            system_1_modified_type_2,
+            system_1_modified_type_3,
+        )
+
+        self.ms.to_deepmd_npy_mixed("tmp.deepmd.mixed")
+        self.place_holder_ms = dpdata.MultiSystems()
+        self.place_holder_ms.from_deepmd_npy("tmp.deepmd.mixed", fmt="deepmd/npy")
+
+        new_type_map = ["H", "C", "D", "A", "B"]
+        self.systems = dpdata.MultiSystems()
+        self.systems.from_deepmd_npy_mixed(
+            "tmp.deepmd.mixed", fmt="deepmd/npy/mixed", type_map=new_type_map
+        )
+        for kk in [ii.formula for ii in self.ms]:
+            # apply type_map to each system
+            self.ms[kk].apply_type_map(new_type_map)
+            # revise keys in dict according because the type_map is updated.
+            tmp_ss = self.ms.systems.pop(kk)
+            self.ms.systems[tmp_ss.formula] = tmp_ss
+
+        self.ms_1 = self.ms
+        self.ms_2 = self.systems
+        mixed_sets = glob("tmp.deepmd.mixed/*/set.*")
+        self.assertEqual(len(mixed_sets), 2)
+        for i in mixed_sets:
+            self.assertEqual(
+                os.path.exists(os.path.join(i, "real_atom_types.npy")), True
+            )
+
+        self.system_names = [
+            "H4C1D0A0B0",
+            "H3C1D0A0B0",
+            "H1C1D0A1B2",
+            "H1C1D0A2B1",
+            "H1C1D2A1B0",
+        ]
+        self.system_sizes = {
+            "H4C1D0A0B0": 1,
+            "H3C1D0A0B0": 1,
+            "H1C1D0A1B2": 1,
+            "H1C1D0A2B1": 1,
+            "H1C1D2A1B0": 1,
+        }
+        self.atom_names = ["H", "C", "D", "A", "B"]
+
+    def tearDown(self):
+        if os.path.exists("tmp.deepmd.mixed"):
+            shutil.rmtree("tmp.deepmd.mixed")
+
+    def test_len(self):
+        self.assertEqual(len(self.ms), 5)
+        self.assertEqual(len(self.place_holder_ms), 2)
+        self.assertEqual(len(self.systems), 5)
+
+    def test_get_nframes(self):
+        self.assertEqual(self.ms.get_nframes(), 5)
+        self.assertEqual(self.place_holder_ms.get_nframes(), 5)
+        self.assertEqual(self.systems.get_nframes(), 5)
+
+    def test_str(self):
+        self.assertEqual(str(self.ms), "MultiSystems (5 systems containing 5 frames)")
+        self.assertEqual(
+            str(self.place_holder_ms), "MultiSystems (2 systems containing 5 frames)"
+        )
+        self.assertEqual(
+            str(self.systems), "MultiSystems (5 systems containing 5 frames)"
+        )
+
+
 class TestMixedMultiSystemsDumpLoadSetSize(
     unittest.TestCase, CompLabeledMultiSys, MultiSystems, MSAllIsNoPBC
 ):

From 35fdbb8df25e79fd50c072f17ec2ddf903bcb9d9 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 31 Aug 2024 07:59:26 -0400
Subject: [PATCH 19/24] fix: reuse regular methods for deepmd/mixed (#704)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit


- **New Features**
- Streamlined data loading process with improved handling of atom types.
- Enhanced modularization in data dumping, reducing complexity and
improving maintainability.
- Added a new test for verifying data conversion and loading processes
for mixed format systems.

- **Bug Fixes**
	- Improved clarity and efficiency in processing atom names and counts.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
---
 dpdata/deepmd/comp.py          |   2 -
 dpdata/deepmd/mixed.py         | 179 ++++-----------------------------
 dpdata/deepmd/raw.py           |  10 +-
 tests/test_custom_data_type.py |  10 ++
 4 files changed, 31 insertions(+), 170 deletions(-)

diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index d6ccf3ffa..661110162 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -79,7 +79,6 @@ def to_system_data(folder, type_map=None, labels=True):
             "orig",
             "cells",
             "coords",
-            "real_atom_types",
             "real_atom_names",
             "nopbc",
             "energies",
@@ -189,7 +188,6 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
             "orig",
             "cells",
             "coords",
-            "real_atom_types",
             "real_atom_names",
             "nopbc",
             "energies",
diff --git a/dpdata/deepmd/mixed.py b/dpdata/deepmd/mixed.py
index ccf9b2f58..2aff3a28a 100644
--- a/dpdata/deepmd/mixed.py
+++ b/dpdata/deepmd/mixed.py
@@ -1,54 +1,16 @@
 from __future__ import annotations
 
 import copy
-import glob
-import os
-import shutil
 
 import numpy as np
 
-
-def load_type(folder):
-    data = {}
-    data["atom_names"] = []
-    # if find type_map.raw, use it
-    assert os.path.isfile(
-        os.path.join(folder, "type_map.raw")
-    ), "Mixed type system must have type_map.raw!"
-    with open(os.path.join(folder, "type_map.raw")) as fp:
-        data["atom_names"] = fp.read().split()
-
-    return data
-
-
-def formula(atom_names, atom_numbs):
-    """Return the formula of this system, like C3H5O2."""
-    return "".join([f"{symbol}{numb}" for symbol, numb in zip(atom_names, atom_numbs)])
-
-
-def _cond_load_data(fname):
-    tmp = None
-    if os.path.isfile(fname):
-        tmp = np.load(fname)
-    return tmp
-
-
-def _load_set(folder, nopbc: bool):
-    coords = np.load(os.path.join(folder, "coord.npy"))
-    if nopbc:
-        cells = np.zeros((coords.shape[0], 3, 3))
-    else:
-        cells = np.load(os.path.join(folder, "box.npy"))
-    eners = _cond_load_data(os.path.join(folder, "energy.npy"))
-    forces = _cond_load_data(os.path.join(folder, "force.npy"))
-    virs = _cond_load_data(os.path.join(folder, "virial.npy"))
-    real_atom_types = np.load(os.path.join(folder, "real_atom_types.npy"))
-    return cells, coords, eners, forces, virs, real_atom_types
+from .comp import dump as comp_dump
+from .comp import to_system_data as comp_to_system_data
 
 
 def to_system_data(folder, type_map=None, labels=True):
+    data = comp_to_system_data(folder, type_map, labels)
     # data is empty
-    data = load_type(folder)
     old_type_map = data["atom_names"].copy()
     if type_map is not None:
         assert isinstance(type_map, list)
@@ -60,50 +22,16 @@ def to_system_data(folder, type_map=None, labels=True):
         data["atom_names"] = type_map.copy()
     else:
         index_map = None
-    data["orig"] = np.zeros([3])
-    if os.path.isfile(os.path.join(folder, "nopbc")):
-        data["nopbc"] = True
-    sets = sorted(glob.glob(os.path.join(folder, "set.*")))
-    all_cells = []
-    all_coords = []
-    all_eners = []
-    all_forces = []
-    all_virs = []
-    all_real_atom_types = []
-    for ii in sets:
-        cells, coords, eners, forces, virs, real_atom_types = _load_set(
-            ii, data.get("nopbc", False)
-        )
-        nframes = np.reshape(cells, [-1, 3, 3]).shape[0]
-        all_cells.append(np.reshape(cells, [nframes, 3, 3]))
-        all_coords.append(np.reshape(coords, [nframes, -1, 3]))
-        if index_map is None:
-            all_real_atom_types.append(np.reshape(real_atom_types, [nframes, -1]))
-        else:
-            all_real_atom_types.append(
-                np.reshape(index_map[real_atom_types], [nframes, -1])
-            )
-        if eners is not None:
-            eners = np.reshape(eners, [nframes])
-        if labels:
-            if eners is not None and eners.size > 0:
-                all_eners.append(np.reshape(eners, [nframes]))
-            if forces is not None and forces.size > 0:
-                all_forces.append(np.reshape(forces, [nframes, -1, 3]))
-            if virs is not None and virs.size > 0:
-                all_virs.append(np.reshape(virs, [nframes, 3, 3]))
-    all_cells_concat = np.concatenate(all_cells, axis=0)
-    all_coords_concat = np.concatenate(all_coords, axis=0)
-    all_real_atom_types_concat = np.concatenate(all_real_atom_types, axis=0)
-    all_eners_concat = None
-    all_forces_concat = None
-    all_virs_concat = None
-    if len(all_eners) > 0:
-        all_eners_concat = np.concatenate(all_eners, axis=0)
-    if len(all_forces) > 0:
-        all_forces_concat = np.concatenate(all_forces, axis=0)
-    if len(all_virs) > 0:
-        all_virs_concat = np.concatenate(all_virs, axis=0)
+    all_real_atom_types_concat = data.pop("real_atom_types").astype(int)
+    if index_map is not None:
+        all_real_atom_types_concat = index_map[all_real_atom_types_concat]
+    all_cells_concat = data["cells"]
+    all_coords_concat = data["coords"]
+    if labels:
+        all_eners_concat = data.get("energies")
+        all_forces_concat = data.get("forces")
+        all_virs_concat = data.get("virials")
+
     data_list = []
     while True:
         if all_real_atom_types_concat.size == 0:
@@ -143,20 +71,6 @@ def to_system_data(folder, type_map=None, labels=True):
 
 
 def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True):
-    os.makedirs(folder, exist_ok=True)
-    sets = sorted(glob.glob(os.path.join(folder, "set.*")))
-    if len(sets) > 0:
-        if remove_sets:
-            for ii in sets:
-                shutil.rmtree(ii)
-        else:
-            raise RuntimeError(
-                "found "
-                + str(sets)
-                + " in "
-                + folder
-                + "not a clean deepmd raw dir. please firstly clean set.* then try compress"
-            )
     # if not converted to mixed
     if "real_atom_types" not in data:
         from dpdata import LabeledSystem, System
@@ -169,69 +83,10 @@ def dump(folder, data, set_size=2000, comp_prec=np.float32, remove_sets=True):
         else:
             temp_sys = System(data=data)
         temp_sys.convert_to_mixed_type()
-    # dump raw
-    np.savetxt(os.path.join(folder, "type.raw"), data["atom_types"], fmt="%d")
-    np.savetxt(os.path.join(folder, "type_map.raw"), data["real_atom_names"], fmt="%s")
-    # BondOrder System
-    if "bonds" in data:
-        np.savetxt(
-            os.path.join(folder, "bonds.raw"),
-            data["bonds"],
-            header="begin_atom, end_atom, bond_order",
-        )
-    if "formal_charges" in data:
-        np.savetxt(os.path.join(folder, "formal_charges.raw"), data["formal_charges"])
-    # reshape frame properties and convert prec
-    nframes = data["cells"].shape[0]
-    cells = np.reshape(data["cells"], [nframes, 9]).astype(comp_prec)
-    coords = np.reshape(data["coords"], [nframes, -1]).astype(comp_prec)
-    eners = None
-    forces = None
-    virials = None
-    real_atom_types = None
-    if "energies" in data:
-        eners = np.reshape(data["energies"], [nframes]).astype(comp_prec)
-    if "forces" in data:
-        forces = np.reshape(data["forces"], [nframes, -1]).astype(comp_prec)
-    if "virials" in data:
-        virials = np.reshape(data["virials"], [nframes, 9]).astype(comp_prec)
-    if "atom_pref" in data:
-        atom_pref = np.reshape(data["atom_pref"], [nframes, -1]).astype(comp_prec)
-    if "real_atom_types" in data:
-        real_atom_types = np.reshape(data["real_atom_types"], [nframes, -1]).astype(
-            np.int64
-        )
-    # dump frame properties: cell, coord, energy, force and virial
-    nsets = nframes // set_size
-    if set_size * nsets < nframes:
-        nsets += 1
-    for ii in range(nsets):
-        set_stt = ii * set_size
-        set_end = (ii + 1) * set_size
-        set_folder = os.path.join(folder, "set.%06d" % ii)
-        os.makedirs(set_folder)
-        np.save(os.path.join(set_folder, "box"), cells[set_stt:set_end])
-        np.save(os.path.join(set_folder, "coord"), coords[set_stt:set_end])
-        if eners is not None:
-            np.save(os.path.join(set_folder, "energy"), eners[set_stt:set_end])
-        if forces is not None:
-            np.save(os.path.join(set_folder, "force"), forces[set_stt:set_end])
-        if virials is not None:
-            np.save(os.path.join(set_folder, "virial"), virials[set_stt:set_end])
-        if real_atom_types is not None:
-            np.save(
-                os.path.join(set_folder, "real_atom_types"),
-                real_atom_types[set_stt:set_end],
-            )
-        if "atom_pref" in data:
-            np.save(os.path.join(set_folder, "atom_pref"), atom_pref[set_stt:set_end])
-    try:
-        os.remove(os.path.join(folder, "nopbc"))
-    except OSError:
-        pass
-    if data.get("nopbc", False):
-        with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
-            pass
+
+    data = data.copy()
+    data["atom_names"] = data.pop("real_atom_names")
+    comp_dump(folder, data, set_size, comp_prec, remove_sets)
 
 
 def mix_system(*system, type_map, **kwargs):
diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py
index 11a9ff6ed..717c73f96 100644
--- a/dpdata/deepmd/raw.py
+++ b/dpdata/deepmd/raw.py
@@ -14,9 +14,6 @@ def load_type(folder, type_map=None):
         int
     )
     ntypes = np.max(data["atom_types"]) + 1
-    data["atom_numbs"] = []
-    for ii in range(ntypes):
-        data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii))
     data["atom_names"] = []
     # if find type_map.raw, use it
     if os.path.isfile(os.path.join(folder, "type_map.raw")):
@@ -30,9 +27,10 @@ def load_type(folder, type_map=None):
         my_type_map = []
         for ii in range(ntypes):
             my_type_map.append("Type_%d" % ii)
-    assert len(my_type_map) >= len(data["atom_numbs"])
-    for ii in range(len(data["atom_numbs"])):
-        data["atom_names"].append(my_type_map[ii])
+    data["atom_names"] = my_type_map
+    data["atom_numbs"] = []
+    for ii, _ in enumerate(data["atom_names"]):
+        data["atom_numbs"].append(np.count_nonzero(data["atom_types"] == ii))
 
     return data
 
diff --git a/tests/test_custom_data_type.py b/tests/test_custom_data_type.py
index a319af35f..230475ab3 100644
--- a/tests/test_custom_data_type.py
+++ b/tests/test_custom_data_type.py
@@ -57,6 +57,16 @@ def test_duplicated_data_type(self):
         n_dtypes_new = len(self.cls.DTYPES)
         self.assertEqual(n_dtypes_old, n_dtypes_new)
 
+    def test_to_deepmd_npy_mixed(self):
+        ms = dpdata.MultiSystems(self.system)
+        ms.to_deepmd_npy_mixed("data_foo_mixed")
+        x = dpdata.MultiSystems().load_systems_from_file(
+            "data_foo_mixed",
+            fmt="deepmd/npy/mixed",
+            labeled=issubclass(self.cls, dpdata.LabeledSystem),
+        )
+        np.testing.assert_allclose(list(x.systems.values())[0].data["foo"], self.foo)
+
 
 class TestDeepmdLoadDumpCompUnlabeled(unittest.TestCase, DeepmdLoadDumpCompTest):
     cls = dpdata.System

From f4d0adf8ca3aceab813f0bab0fdaeb9b55bf6ed6 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 31 Aug 2024 08:45:45 -0400
Subject: [PATCH 20/24] docs: reformat README and documentation (#707)

Move most of the contents from `README.md` to `docs`.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Introduced `BondOrderSystem` class for managing chemical bonds and
formal charges.
- Added `MultiSystems` class for handling multiple system data from
files or directories.
- Implemented new `deepmd/npy/mixed` format for combining systems with
different formulas.
- Launched plugin documentation to enhance extensibility for
user-created plugins.

- **Documentation**
- Updated README for clearer installation instructions and streamlined
content.
	- Created dedicated installation guide for user-friendly setup.
- Enhanced documentation structure with new entries and improved
navigation.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 README.md                         | 323 +++---------------------------
 docs/index.rst                    |   9 +-
 docs/installation.md              |  13 ++
 docs/plugin.md                    |   9 +
 docs/systems/bond_order_system.md |  51 +++++
 docs/systems/index.rst            |  11 +
 docs/systems/mixed.md             |  24 +++
 docs/systems/multi.md             |  62 ++++++
 docs/systems/system.md            | 112 +++++++++++
 9 files changed, 312 insertions(+), 302 deletions(-)
 create mode 100644 docs/installation.md
 create mode 100644 docs/plugin.md
 create mode 100644 docs/systems/bond_order_system.md
 create mode 100644 docs/systems/index.rst
 create mode 100644 docs/systems/mixed.md
 create mode 100644 docs/systems/multi.md
 create mode 100644 docs/systems/system.md

diff --git a/README.md b/README.md
index ab489a75e..6f9246b1c 100644
--- a/README.md
+++ b/README.md
@@ -4,320 +4,47 @@
 [![pip install](https://img.shields.io/pypi/dm/dpdata?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdata)
 [![Documentation Status](https://readthedocs.org/projects/dpdata/badge/)](https://dpdata.readthedocs.io/)
 
-**dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian.
-dpdata only works with python 3.7 or above.
-
+**dpdata** is a Python package for manipulating atomistic data of software in computational science.
 
 ## Installation
-One can download the source code of dpdata by
-```bash
-git clone https://github.com/deepmodeling/dpdata.git dpdata
-```
-then use `pip` to install the module from source
-```bash
-cd dpdata
-pip install .
-```
-
-`dpdata` can also by install via pip without source
-```bash
-pip install dpdata
-```
-
 
-## Quick start
-
-This section gives some examples on how dpdata works. Firstly one needs to import the module in a python 3.x compatible code.
-```python
-import dpdata
-```
-The typicall workflow of `dpdata` is
-
-1. Load data from vasp or lammps or deepmd-kit data files.
-2. Manipulate data
-3. Dump data to in a desired format
-
-
-### Load data
-```python
-d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar")
-```
-or let dpdata infer the format (`vasp/poscar`) of the file from the file name extension
-```python
-d_poscar = dpdata.System("my.POSCAR")
-```
-The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data `System` called `d_poscar`.
-A data `System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one `System`.
-It is noted that `POSCAR` only contains one frame.
-If the multiple frames stored in, for example, a `OUTCAR` is wanted,
-```python
-d_outcar = dpdata.LabeledSystem("OUTCAR")
-```
-The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any), are loaded by `LabeledSystem`. It is noted that the forces of atoms are always assumed to exist. `LabeledSystem` is a derived class of `System`.
+DP-GEN only supports Python 3.7 and above. You can [setup a conda/pip environment](https://docs.deepmodeling.com/faq/conda.html), and then use one of the following methods to install DP-GEN:
 
-The `System` or `LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`:
+- Install via pip: `pip install dpdata`
+- Install via conda: `conda install -c conda-forge dpdata`
+- Install from source code: `git clone https://github.com/deepmodeling/dpdata && pip install ./dpdata`
 
-| Software| format | multi frames | labeled | class	    | format key    |
-| ------- | :---   | :---:        | :---:   | :---          | :---          |
-| vasp	  | poscar | False        | False   | System	    | 'vasp/poscar' |
-| vasp    | outcar | True         | True    | LabeledSystem | 'vasp/outcar' |
-| vasp    | xml    | True         | True    | LabeledSystem | 'vasp/xml'    |
-| lammps  | lmp    | False        | False   | System        | 'lammps/lmp'  |
-| lammps  | dump   | True         | False   | System        | 'lammps/dump' |
-| deepmd  | raw    | True         | False   | System	    | 'deepmd/raw'  |
-| deepmd  | npy    | True         | False   | System        | 'deepmd/npy'  |
-| deepmd  | raw    | True         | True    | LabeledSystem | 'deepmd/raw'  |
-| deepmd  | npy    | True         | True    | LabeledSystem | 'deepmd/npy'  |
-| deepmd  | npy    | True         | True    | MultiSystems | 'deepmd/npy/mixed'  |
-| deepmd  | npy    | True         | False    | MultiSystems | 'deepmd/npy/mixed'  |
-| gaussian| log    | False        | True    | LabeledSystem | 'gaussian/log'|
-| gaussian| log    | True         | True    | LabeledSystem | 'gaussian/md' |
-| siesta  | output | False        | True    | LabeledSystem | 'siesta/output'|
-| siesta  | aimd_output  | True         | True    | LabeledSystem | 'siesta/aimd_output' |
-| cp2k(deprecated in future)    | output | False        | True    | LabeledSystem | 'cp2k/output' |
-| cp2k(deprecated in future)    | aimd_output  | True         | True    | LabeledSystem | 'cp2k/aimd_output' |
-| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata))    | stdout | False        | True    | LabeledSystem | 'cp2kdata/e_f' |
-| cp2k([plug-in](https://github.com/robinzyb/cp2kdata#plug-in-for-dpdata))    | stdout  | True         | True    | LabeledSystem | 'cp2kdata/md' |
-| QE      | log    | False        | True    | LabeledSystem | 'qe/pw/scf'   |
-| QE      | log    | True         | False   | System        | 'qe/cp/traj'  |
-| QE      | log    | True         | True    | LabeledSystem | 'qe/cp/traj'  |
-| Fhi-aims| output | True         | True    | LabeledSystem | 'fhi_aims/md'  |
-| Fhi-aims| output | False        | True    | LabeledSystem | 'fhi_aims/scf'  |
-|quip/gap|xyz|True|True|MultiSystems|'quip/gap/xyz'|
-| PWmat   | atom.config | False        | False   | System        | 'pwmat/atom.config'  |
-| PWmat   | movement    | True         | True    | LabeledSystem | 'pwmat/movement'     |
-| PWmat   | OUT.MLMD    | True         | True    | LabeledSystem | 'pwmat/out.mlmd'     |
-| Amber   | multi       | True         | True    | LabeledSystem | 'amber/md'           |
-| Amber/sqm | sqm.out   | False        | False   | System        | 'sqm/out'            |
-| Gromacs | gro         | True         | False   | System        | 'gromacs/gro'        |
-| ABACUS  | STRU        | False        | False   | System        | 'abacus/stru'        |
-| ABACUS  | STRU        | False        | True    | LabeledSystem | 'abacus/scf'         |
-| ABACUS  | cif         | True         | True    | LabeledSystem | 'abacus/md'          |
-| ABACUS  | STRU        | True         | True    | LabeledSystem | 'abacus/relax'       |
-| ase     | structure   | True         | True    | MultiSystems  | 'ase/structure'      |
-| DFTB+   | dftbplus    | False        | True    | LabeledSystem | 'dftbplus'           |
-| n2p2    | n2p2        | True         | True    | LabeledSystem | 'n2p2'               |
-
-
-The Class `dpdata.MultiSystems`  can read data  from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
-
-Use `dpdata.MultiSystems.from_dir` to read from a  directory, `dpdata.MultiSystems` will walk in the directory
-Recursively  and  find all file with specific file_name. Supports all the file formats that `dpdata.LabeledSystem` supports.
-
-Use  `dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats.
-
-For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type.
-
-The following commands relating to `Class dpdata.MultiSystems` may be useful.
-```python
-# load data
-
-xyz_multi_systems = dpdata.MultiSystems.from_file(
-    file_name="tests/xyz/xyz_unittest.xyz", fmt="quip/gap/xyz"
-)
-vasp_multi_systems = dpdata.MultiSystems.from_dir(
-    dir_name="./mgal_outcar", file_name="OUTCAR", fmt="vasp/outcar"
-)
-
-# use wildcard
-vasp_multi_systems = dpdata.MultiSystems.from_dir(
-    dir_name="./mgal_outcar", file_name="*OUTCAR", fmt="vasp/outcar"
-)
-
-# print the multi_system infomation
-print(xyz_multi_systems)
-print(xyz_multi_systems.systems)  # return a dictionaries
-
-# print the system infomation
-print(xyz_multi_systems.systems["B1C9"].data)
-
-# dump a system's data to ./my_work_dir/B1C9_raw folder
-xyz_multi_systems.systems["B1C9"].to_deepmd_raw("./my_work_dir/B1C9_raw")
-
-# dump all systems
-xyz_multi_systems.to_deepmd_raw("./my_deepmd_data/")
-```
-
-You may also use the following code to parse muti-system:
-```python
-from dpdata import LabeledSystem, MultiSystems
-from glob import glob
-
-"""
-process multi systems
-"""
-fs = glob("./*/OUTCAR")  # remeber to change here !!!
-ms = MultiSystems()
-for f in fs:
-    try:
-        ls = LabeledSystem(f)
-    except:
-        print(f)
-    if len(ls) > 0:
-        ms.append(ls)
-
-ms.to_deepmd_raw("deepmd")
-ms.to_deepmd_npy("deepmd")
-```
+To test if the installation is successful, you may execute
 
-### Access data
-These properties stored in `System` and `LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example
-```python
-coords = d_outcar["coords"]
-```
-Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system)
-
-| key		|  type		| dimension		| are labels	| description
-| ---		| ---		| ---			| ---		| ---
-| 'atom_names'	| list of str	| ntypes		| False		| The name of each atom type
-| 'atom_numbs'	| list of int	| ntypes		| False		| The number of atoms of each atom type
-| 'atom_types'	| np.ndarray	| natoms		| False		| Array assigning type to each atom
-| 'cells'	| np.ndarray	| nframes x 3 x 3	| False		| The cell tensor of each frame
-| 'coords'	| np.ndarray	| nframes x natoms x 3	| False		| The atom coordinates
-| 'energies'	| np.ndarray	| nframes		| True		| The frame energies
-| 'forces'	| np.ndarray	| nframes x natoms x 3	| True		| The atom forces
-| 'virials'	| np.ndarray	| nframes x 3 x 3	| True		| The virial tensor of each frame
-
-
-### Dump data
-The data stored in `System` or `LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example:
-```python
-d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0)
-```
-The first frames of `d_outcar` will be dumped to 'conf.lmp'
-```python
-d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1)
-```
-The last frames of `d_outcar` will be dumped to 'POSCAR'.
-
-The data stored in `LabeledSystem` can be dumped to deepmd-kit raw format, for example
-```python
-d_outcar.to("deepmd/raw", "dpmd_raw")
-```
-Or a simpler command:
-```python
-dpdata.LabeledSystem("OUTCAR").to("deepmd/raw", "dpmd_raw")
-```
-Frame selection can be implemented by
-```python
-dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw")
-```
-by which only the first and last frames are dumped to `dpmd_raw`.
-
-
-### replicate
-dpdata will create a super cell of the current atom configuration.
-```python
-dpdata.System("./POSCAR").replicate(
-    (
-        1,
-        2,
-        3,
-    )
-)
-```
-tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction.
-
-
-### perturb
-By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere).
-```python
-perturbed_system = dpdata.System("./POSCAR").perturb(
-    pert_num=3,
-    cell_pert_fraction=0.05,
-    atom_pert_distance=0.6,
-    atom_pert_style="normal",
-)
-print(perturbed_system.data)
-```
-
-### replace
-By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged.
-```python
-s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar")
-s.replace("Hf", "Zr", 8)
-s.to_vasp_poscar("POSCAR.P42nmc.replace")
-```
-
-## BondOrderSystem
-A new class `BondOrderSystem` which inherits from class `System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel).
-```python
-import dpdata
-
-system_1 = dpdata.BondOrderSystem(
-    "tests/bond_order/CH3OH.mol", fmt="mol"
-)  # read from .mol file
-system_2 = dpdata.BondOrderSystem(
-    "tests/bond_order/methane.sdf", fmt="sdf"
-)  # read from .sdf file
-```
-In sdf file, all molecules must be of the same topology (i.e. conformers of the same molecular configuration).
-`BondOrderSystem` also supports initialize from a `rdkit.Chem.rdchem.Mol` object directly.
-```python
-from rdkit import Chem
-from rdkit.Chem import AllChem
-import dpdata
-
-mol = Chem.MolFromSmiles("CC")
-mol = Chem.AddHs(mol)
-AllChem.EmbedMultipleConfs(mol, 10)
-system = dpdata.BondOrderSystem(rdkit_mol=mol)
-```
-
-### Bond Order Assignment
-The `BondOrderSystem` implements a more robust sanitize procedure for rdkit Mol, as defined in `dpdata.rdkit.santizie.Sanitizer`. This class defines 3 level of sanitization process by: low, medium and high. (default is medium).
-+ low: use `rdkit.Chem.SanitizeMol()` function to sanitize molecule.
-+ medium: before using rdkit, the programm will first assign formal charge of each atom to avoid inappropriate valence exceptions. However, this mode requires the rightness of the bond order information in the given molecule.
-+ high: the program will try to fix inappropriate bond orders in aromatic hetreocycles, phosphate, sulfate, carboxyl, nitro, nitrine, guanidine groups. If this procedure fails to sanitize the given molecule, the program will then try to call `obabel` to pre-process the mol and repeat the sanitization procedure. **That is to say, if you wan't to use this level of sanitization, please ensure `obabel` is installed in the environment.**
-According to our test, our sanitization procedure can successfully read 4852 small molecules in the PDBBind-refined-set. It is necessary to point out that the in the molecule file (mol/sdf), the number of explicit hydrogens has to be correct. Thus, we recommend to use
- `obabel xxx -O xxx -h` to pre-process the file. The reason why we do not implement this hydrogen-adding procedure in dpdata is that we can not ensure its correctness.
-
-```python
-import dpdata
-
-for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"):
-    syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False)
-```
-### Formal Charge Assignment
-BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As
-```python
-import dpdata
-
-syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt="mol")
-print(syst.get_formal_charges())  # return the formal charge on each atom
-print(syst.get_charge())  # return the total charge of the system
+```bash
+dpdata --version
 ```
 
-If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule.
+## Supported packages
 
-## Mixed Type Format
-The format `deepmd/npy/mixed` is the mixed type numpy format for DeePMD-kit, and can be loaded or dumped through class `dpdata.MultiSystems`.
+`dpdata` is aimmed to support different kinds of atomistic packages:
 
-Under this format, systems with the same number of atoms but different formula can be put together
-for a larger system, especially when the frame numbers in systems are sparse.
+- Atomistic machine learning packages, such as [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit);
+- Molecular dynamics packages, such as [LAMMPS](https://github.com/lammps/lammps) and [GROMACS](https://gitlab.com/gromacs/gromacs);
+- Quantum chemistry packages, such as [VASP](https://www.vasp.at/), [Gaussian](https://gaussian.com), and [ABACUS](https://github.com/deepmodeling/abacus-develop);
+- Atomistic visualization packages, such as [3Dmol.js](https://3dmol.csb.pitt.edu/).
+- Other atomistic tools, such as [ASE](https://gitlab.com/ase/ase).
+- Common formats such as `xyz`.
 
-This also helps to mixture the type information together for model training with type embedding network.
+All supported formats are listed [here](https://docs.deepmodeling.com/projects/dpdata/en/master/formats.html).
 
-Here are examples using `deepmd/npy/mixed` format:
+## Quick start
 
-- Dump a MultiSystems into a mixed type numpy directory:
-```python
-import dpdata
+The quickest way to convert a simple file from one format to another one is to use the [command line](https://docs.deepmodeling.com/projects/dpdata/en/master/cli.html).
 
-dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir")
+```sh
+dpdata OUTCAR -i vasp/outcar -o deepmd/npy -O deepmd_data
 ```
 
-- Load a mixed type data into a MultiSystems:
-```python
-import dpdata
-
-dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed")
-```
+For advanced usage with Python APIs, [read dpdata documentation](https://docs.deepmodeling.com/projects/dpdata/).
 
 ## Plugins
 
-One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's critical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in [`pyproject.toml`](plugin_example/pyproject.toml):
-```toml
-[project.entry-points.'dpdata.plugins']
-random = "dpdata_random:RandomFormat"
-```
+- [cp2kdata](https://github.com/robinzyb/cp2kdata) adds the latest CP2K support for dpdata.
+
+For how to create your own plugin packages, [read dpdata documentation](https://docs.deepmodeling.com/projects/dpdata/).
diff --git a/docs/index.rst b/docs/index.rst
index 07e381de7..3d98bd566 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -6,22 +6,23 @@
 Welcome to dpdata's documentation!
 ==================================
 
+dpdata is a Python package for manipulating atomistic data of software in computational science.
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
-   Overview <self>
+   installation
+   systems/index
    try_dpdata
    cli
    formats
    drivers
    minimizers
+   plugin
    api/api
    credits
 
-.. mdinclude:: ../README.md
-
-
 Indices and tables
 ==================
 
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 000000000..064f91331
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,13 @@
+# Installation
+
+DP-GEN only supports Python 3.7 and above. You can [setup a conda/pip environment](https://docs.deepmodeling.com/faq/conda.html), and then use one of the following methods to install DP-GEN:
+
+- Install via pip: `pip install dpdata`
+- Install via conda: `conda install -c conda-forge dpdata`
+- Install from source code: `git clone https://github.com/deepmodeling/dpdata && pip install ./dpdata`
+
+To test if the installation is successful, you may execute
+
+```bash
+dpdata --version
+```
diff --git a/docs/plugin.md b/docs/plugin.md
new file mode 100644
index 000000000..44e8eabf7
--- /dev/null
+++ b/docs/plugin.md
@@ -0,0 +1,9 @@
+# Plugins
+
+One can follow a simple example under `plugin_example/` directory to add their own format by creating and installing plugins.
+It's critical to add the :class:`Format` class to `entry_points['dpdata.plugins']` in `pyproject.toml`:
+
+```toml
+[project.entry-points.'dpdata.plugins']
+random = "dpdata_random:RandomFormat"
+```
diff --git a/docs/systems/bond_order_system.md b/docs/systems/bond_order_system.md
new file mode 100644
index 000000000..443120a76
--- /dev/null
+++ b/docs/systems/bond_order_system.md
@@ -0,0 +1,51 @@
+
+## BondOrderSystem
+A new class :class:`BondOrderSystem` which inherits from class :class:`System` is introduced in dpdata. This new class contains information of chemical bonds and formal charges (stored in `BondOrderSystem.data['bonds']`, `BondOrderSystem.data['formal_charges']`). Now BondOrderSystem can only read from .mol/.sdf formats, because of its dependency on rdkit (which means rdkit must be installed if you want to use this function). Other formats, such as pdb, must be converted to .mol/.sdf format (maybe with software like open babel).
+```python
+import dpdata
+
+system_1 = dpdata.BondOrderSystem(
+    "tests/bond_order/CH3OH.mol", fmt="mol"
+)  # read from .mol file
+system_2 = dpdata.BondOrderSystem(
+    "tests/bond_order/methane.sdf", fmt="sdf"
+)  # read from .sdf file
+```
+In sdf file, all molecules must be of the same topology (i.e. conformers of the same molecular configuration).
+`BondOrderSystem` also supports initialize from a :class:`rdkit.Chem.rdchem.Mol` object directly.
+```python
+from rdkit import Chem
+from rdkit.Chem import AllChem
+import dpdata
+
+mol = Chem.MolFromSmiles("CC")
+mol = Chem.AddHs(mol)
+AllChem.EmbedMultipleConfs(mol, 10)
+system = dpdata.BondOrderSystem(rdkit_mol=mol)
+```
+
+### Bond Order Assignment
+The :class:`BondOrderSystem` implements a more robust sanitize procedure for rdkit Mol, as defined in :class:`dpdata.rdkit.santizie.Sanitizer`. This class defines 3 level of sanitization process by: low, medium and high. (default is medium).
++ low: use `rdkit.Chem.SanitizeMol()` function to sanitize molecule.
++ medium: before using rdkit, the programm will first assign formal charge of each atom to avoid inappropriate valence exceptions. However, this mode requires the rightness of the bond order information in the given molecule.
++ high: the program will try to fix inappropriate bond orders in aromatic hetreocycles, phosphate, sulfate, carboxyl, nitro, nitrine, guanidine groups. If this procedure fails to sanitize the given molecule, the program will then try to call `obabel` to pre-process the mol and repeat the sanitization procedure. **That is to say, if you wan't to use this level of sanitization, please ensure `obabel` is installed in the environment.**
+According to our test, our sanitization procedure can successfully read 4852 small molecules in the PDBBind-refined-set. It is necessary to point out that the in the molecule file (mol/sdf), the number of explicit hydrogens has to be correct. Thus, we recommend to use
+ `obabel xxx -O xxx -h` to pre-process the file. The reason why we do not implement this hydrogen-adding procedure in dpdata is that we can not ensure its correctness.
+
+```python
+import dpdata
+
+for sdf_file in glob.glob("bond_order/refined-set-ligands/obabel/*sdf"):
+    syst = dpdata.BondOrderSystem(sdf_file, sanitize_level="high", verbose=False)
+```
+### Formal Charge Assignment
+BondOrderSystem implement a method to assign formal charge for each atom based on the 8-electron rule (see below). Note that it only supports common elements in bio-system: B,C,N,O,P,S,As
+```python
+import dpdata
+
+syst = dpdata.BondOrderSystem("tests/bond_order/CH3NH3+.mol", fmt="mol")
+print(syst.get_formal_charges())  # return the formal charge on each atom
+print(syst.get_charge())  # return the total charge of the system
+```
+
+If a valence of 3 is detected on carbon, the formal charge will be assigned to -1. Because for most cases (in alkynyl anion, isonitrile, cyclopentadienyl anion), the formal charge on 3-valence carbon is -1, and this is also consisent with the 8-electron rule.
diff --git a/docs/systems/index.rst b/docs/systems/index.rst
new file mode 100644
index 000000000..dfcbe698e
--- /dev/null
+++ b/docs/systems/index.rst
@@ -0,0 +1,11 @@
+Systems
+=======
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   system
+   multi
+   bond_order_system
+   mixed
diff --git a/docs/systems/mixed.md b/docs/systems/mixed.md
new file mode 100644
index 000000000..69bf80ea8
--- /dev/null
+++ b/docs/systems/mixed.md
@@ -0,0 +1,24 @@
+# Mixed Type Format
+
+The format `deepmd/npy/mixed` is the mixed type numpy format for DeePMD-kit, and can be loaded or dumped through class :class:`dpdata.MultiSystems`.
+
+Under this format, systems with the same number of atoms but different formula can be put together
+for a larger system, especially when the frame numbers in systems are sparse.
+
+This also helps to mixture the type information together for model training with type embedding network.
+
+Here are examples using `deepmd/npy/mixed` format:
+
+- Dump a MultiSystems into a mixed type numpy directory:
+```python
+import dpdata
+
+dpdata.MultiSystems(*systems).to_deepmd_npy_mixed("mixed_dir")
+```
+
+- Load a mixed type data into a MultiSystems:
+```python
+import dpdata
+
+dpdata.MultiSystems().load_systems_from_file("mixed_dir", fmt="deepmd/npy/mixed")
+```
diff --git a/docs/systems/multi.md b/docs/systems/multi.md
new file mode 100644
index 000000000..8e47acf70
--- /dev/null
+++ b/docs/systems/multi.md
@@ -0,0 +1,62 @@
+# `MultiSystems`
+
+The Class :class:`dpdata.MultiSystems`  can read data  from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
+
+Use :meth:`dpdata.MultiSystems.from_dir` to read from a  directory, :class:`dpdata.MultiSystems` will walk in the directory
+Recursively  and  find all file with specific file_name. Supports all the file formats that :class:`dpdata.LabeledSystem` supports.
+
+Use :meth:`dpdata.MultiSystems.from_file` to read from single file. Single-file support is available for the `quip/gap/xyz` and `ase/structure` formats.
+
+For example, for `quip/gap xyz` files, single .xyz file may contain many different configurations with different atom numbers and atom type.
+
+The following commands relating to :class:`dpdata.MultiSystems` may be useful.
+```python
+# load data
+
+xyz_multi_systems = dpdata.MultiSystems.from_file(
+    file_name="tests/xyz/xyz_unittest.xyz", fmt="quip/gap/xyz"
+)
+vasp_multi_systems = dpdata.MultiSystems.from_dir(
+    dir_name="./mgal_outcar", file_name="OUTCAR", fmt="vasp/outcar"
+)
+
+# use wildcard
+vasp_multi_systems = dpdata.MultiSystems.from_dir(
+    dir_name="./mgal_outcar", file_name="*OUTCAR", fmt="vasp/outcar"
+)
+
+# print the multi_system infomation
+print(xyz_multi_systems)
+print(xyz_multi_systems.systems)  # return a dictionaries
+
+# print the system infomation
+print(xyz_multi_systems.systems["B1C9"].data)
+
+# dump a system's data to ./my_work_dir/B1C9_raw folder
+xyz_multi_systems.systems["B1C9"].to_deepmd_raw("./my_work_dir/B1C9_raw")
+
+# dump all systems
+xyz_multi_systems.to_deepmd_raw("./my_deepmd_data/")
+```
+
+You may also use the following code to parse muti-system:
+```python
+from dpdata import LabeledSystem, MultiSystems
+from glob import glob
+
+"""
+process multi systems
+"""
+fs = glob("./*/OUTCAR")  # remeber to change here !!!
+ms = MultiSystems()
+for f in fs:
+    try:
+        ls = LabeledSystem(f)
+    except:
+        print(f)
+    if len(ls) > 0:
+        ms.append(ls)
+
+ms.to_deepmd_raw("deepmd")
+ms.to_deepmd_npy("deepmd")
+```
diff --git a/docs/systems/system.md b/docs/systems/system.md
new file mode 100644
index 000000000..0401a3110
--- /dev/null
+++ b/docs/systems/system.md
@@ -0,0 +1,112 @@
+# `System` and `LabeledSystem`
+
+This section gives some examples on how dpdata works. Firstly one needs to import the module in a python 3.x compatible code.
+```python
+import dpdata
+```
+The typicall workflow of `dpdata` is
+
+1. Load data from vasp or lammps or deepmd-kit data files.
+2. Manipulate data
+3. Dump data to in a desired format
+
+
+### Load data
+```python
+d_poscar = dpdata.System("POSCAR", fmt="vasp/poscar")
+```
+or let dpdata infer the format (`vasp/poscar`) of the file from the file name extension
+```python
+d_poscar = dpdata.System("my.POSCAR")
+```
+The number of atoms, atom types, coordinates are loaded from the `POSCAR` and stored to a data :class:`System` called `d_poscar`.
+A data :class:`System` (a concept used by [deepmd-kit](https://github.com/deepmodeling/deepmd-kit)) contains frames that has the same number of atoms of the same type. The order of the atoms should be consistent among the frames in one :class:`System`.
+It is noted that `POSCAR` only contains one frame.
+If the multiple frames stored in, for example, a `OUTCAR` is wanted,
+```python
+d_outcar = dpdata.LabeledSystem("OUTCAR")
+```
+The labels provided in the `OUTCAR`, i.e. energies, forces and virials (if any), are loaded by :class:`LabeledSystem`. It is noted that the forces of atoms are always assumed to exist. :class:`LabeledSystem` is a derived class of :class:`System`.
+
+The :class:`System` or :class:`LabeledSystem` can be constructed from the following file formats with the `format key` in the table passed to argument `fmt`:
+
+
+
+### Access data
+These properties stored in :class:`System` and :class:`LabeledSystem` can be accessed by operator `[]` with the key of the property supplied, for example
+```python
+coords = d_outcar["coords"]
+```
+Available properties are (nframe: number of frames in the system, natoms: total number of atoms in the system)
+
+| key		|  type		| dimension		| are labels	| description
+| ---		| ---		| ---			| ---		| ---
+| 'atom_names'	| list of str	| ntypes		| False		| The name of each atom type
+| 'atom_numbs'	| list of int	| ntypes		| False		| The number of atoms of each atom type
+| 'atom_types'	| np.ndarray	| natoms		| False		| Array assigning type to each atom
+| 'cells'	| np.ndarray	| nframes x 3 x 3	| False		| The cell tensor of each frame
+| 'coords'	| np.ndarray	| nframes x natoms x 3	| False		| The atom coordinates
+| 'energies'	| np.ndarray	| nframes		| True		| The frame energies
+| 'forces'	| np.ndarray	| nframes x natoms x 3	| True		| The atom forces
+| 'virials'	| np.ndarray	| nframes x 3 x 3	| True		| The virial tensor of each frame
+
+
+### Dump data
+The data stored in :class:`System` or :class:`LabeledSystem` can be dumped in 'lammps/lmp' or 'vasp/poscar' format, for example:
+```python
+d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0)
+```
+The first frames of `d_outcar` will be dumped to 'conf.lmp'
+```python
+d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1)
+```
+The last frames of `d_outcar` will be dumped to 'POSCAR'.
+
+The data stored in `LabeledSystem` can be dumped to deepmd-kit raw format, for example
+```python
+d_outcar.to("deepmd/raw", "dpmd_raw")
+```
+Or a simpler command:
+```python
+dpdata.LabeledSystem("OUTCAR").to("deepmd/raw", "dpmd_raw")
+```
+Frame selection can be implemented by
+```python
+dpdata.LabeledSystem("OUTCAR").sub_system([0, -1]).to("deepmd/raw", "dpmd_raw")
+```
+by which only the first and last frames are dumped to `dpmd_raw`.
+
+
+### replicate
+dpdata will create a super cell of the current atom configuration.
+```python
+dpdata.System("./POSCAR").replicate(
+    (
+        1,
+        2,
+        3,
+    )
+)
+```
+tuple(1,2,3) means don't copy atom configuration in x direction, make 2 copys in y direction, make 3 copys in z direction.
+
+
+### perturb
+By the following example, each frame of the original system (`dpdata.System('./POSCAR')`) is perturbed to generate three new frames. For each frame, the cell is perturbed by 5% and the atom positions are perturbed by 0.6 Angstrom. `atom_pert_style` indicates that the perturbation to the atom positions is subject to normal distribution. Other available options to `atom_pert_style` are`uniform` (uniform in a ball), and `const` (uniform on a sphere).
+```python
+perturbed_system = dpdata.System("./POSCAR").perturb(
+    pert_num=3,
+    cell_pert_fraction=0.05,
+    atom_pert_distance=0.6,
+    atom_pert_style="normal",
+)
+print(perturbed_system.data)
+```
+
+### replace
+By the following example, Random 8 Hf atoms in the system will be replaced by Zr atoms with the atom postion unchanged.
+```python
+s = dpdata.System("tests/poscars/POSCAR.P42nmc", fmt="vasp/poscar")
+s.replace("Hf", "Zr", 8)
+s.to_vasp_poscar("POSCAR.P42nmc.replace")
+```

From 5df6acd266589b131c961390414337472e0880d9 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Sep 2024 22:18:50 -0400
Subject: [PATCH 21/24] fix: fix a typo in #702 (#710)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Improvements**
- Enhanced the flexibility of data handling by modifying how data types
are processed, allowing for better adaptability based on context.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 dpdata/deepmd/comp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index 661110162..ad6384088 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -180,7 +180,7 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
         dtypes = dpdata.system.LabeledSystem.DTYPES
     else:
         dtypes = dpdata.system.System.DTYPES
-    for dtype in dpdata.system.LabeledSystem.DTYPES:
+    for dtype in dtypes:
         if dtype.name in (
             "atom_numbs",
             "atom_names",

From 6bf41e31b4952e54e7cec6f8b6c9cc3aabdc05cb Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Sep 2024 22:58:22 -0400
Subject: [PATCH 22/24] feat: file object passed to open (#709)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a new utility function `open_file` for improved file
handling across various modules.
- Enhanced type annotations for multiple functions to specify
`FileType`, improving code clarity and type safety.

- **Bug Fixes**
- Improved file handling robustness by replacing the built-in `open`
function with the custom `open_file` function in several modules.

- **Tests**
- Added unit tests for the new `open_file` utility function to ensure
reliable functionality.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 dpdata/abacus/md.py        | 10 +++++----
 dpdata/abacus/relax.py     |  8 ++++---
 dpdata/abacus/scf.py       | 10 +++++----
 dpdata/amber/md.py         |  7 +++---
 dpdata/amber/sqm.py        | 14 ++++++++----
 dpdata/deepmd/comp.py      |  3 ++-
 dpdata/deepmd/raw.py       |  5 +++--
 dpdata/dftbplus/output.py  | 15 ++++++++++---
 dpdata/gaussian/log.py     | 11 +++++++--
 dpdata/gromacs/gro.py      | 10 +++++++--
 dpdata/lammps/dump.py      | 10 +++++++--
 dpdata/openmx/omx.py       | 21 +++++++++++------
 dpdata/orca/output.py      | 13 +++++++++--
 dpdata/plugins/abacus.py   | 10 +++++++--
 dpdata/plugins/amber.py    |  3 ++-
 dpdata/plugins/gaussian.py | 19 ++++++++++------
 dpdata/plugins/gromacs.py  | 12 ++++++++--
 dpdata/plugins/lammps.py   | 14 ++++++++----
 dpdata/plugins/n2p2.py     | 14 ++++++++----
 dpdata/plugins/orca.py     |  9 ++++++--
 dpdata/plugins/psi4.py     | 14 ++++++++----
 dpdata/plugins/pwmat.py    | 14 ++++++++----
 dpdata/plugins/vasp.py     | 15 ++++++++-----
 dpdata/plugins/xyz.py      | 14 ++++++++----
 dpdata/psi4/output.py      | 10 +++++++--
 dpdata/qe/scf.py           |  6 +++--
 dpdata/qe/traj.py          | 20 +++++++++++------
 dpdata/utils.py            | 46 +++++++++++++++++++++++++++++++++++++-
 tests/test_read_file.py    | 22 ++++++++++++++++++
 29 files changed, 289 insertions(+), 90 deletions(-)
 create mode 100644 tests/test_read_file.py

diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
index fa1841777..9e0a416a0 100644
--- a/dpdata/abacus/md.py
+++ b/dpdata/abacus/md.py
@@ -5,6 +5,8 @@
 
 import numpy as np
 
+from dpdata.utils import open_file
+
 from .scf import (
     bohr2ang,
     get_cell,
@@ -156,12 +158,12 @@ def get_frame(fname):
         path_in = os.path.join(fname, "INPUT")
     else:
         raise RuntimeError("invalid input")
-    with open(path_in) as fp:
+    with open_file(path_in) as fp:
         inlines = fp.read().split("\n")
     geometry_path_in = get_geometry_in(fname, inlines)  # base dir of STRU
     path_out = get_path_out(fname, inlines)
 
-    with open(geometry_path_in) as fp:
+    with open_file(geometry_path_in) as fp:
         geometry_inlines = fp.read().split("\n")
     celldm, cell = get_cell(geometry_inlines)
     atom_names, natoms, types, coords = get_coords(
@@ -172,11 +174,11 @@ def get_frame(fname):
     # ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
     # number of dumped geometry files
     # coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell)
-    with open(os.path.join(path_out, "MD_dump")) as fp:
+    with open_file(os.path.join(path_out, "MD_dump")) as fp:
         dumplines = fp.read().split("\n")
     coords, cells, force, stress = get_coords_from_dump(dumplines, natoms)
     ndump = np.shape(coords)[0]
-    with open(os.path.join(path_out, "running_md.log")) as fp:
+    with open_file(os.path.join(path_out, "running_md.log")) as fp:
         outlines = fp.read().split("\n")
     energy = get_energy(outlines, ndump, dump_freq)
 
diff --git a/dpdata/abacus/relax.py b/dpdata/abacus/relax.py
index 976243b82..7a4abf357 100644
--- a/dpdata/abacus/relax.py
+++ b/dpdata/abacus/relax.py
@@ -4,6 +4,8 @@
 
 import numpy as np
 
+from dpdata.utils import open_file
+
 from .scf import (
     bohr2ang,
     collect_force,
@@ -174,10 +176,10 @@ def get_frame(fname):
         path_in = os.path.join(fname, "INPUT")
     else:
         raise RuntimeError("invalid input")
-    with open(path_in) as fp:
+    with open_file(path_in) as fp:
         inlines = fp.read().split("\n")
     geometry_path_in = get_geometry_in(fname, inlines)  # base dir of STRU
-    with open(geometry_path_in) as fp:
+    with open_file(geometry_path_in) as fp:
         geometry_inlines = fp.read().split("\n")
     celldm, cell = get_cell(geometry_inlines)
     atom_names, natoms, types, coord_tmp = get_coords(
@@ -186,7 +188,7 @@ def get_frame(fname):
 
     logf = get_log_file(fname, inlines)
     assert os.path.isfile(logf), f"Error: can not find {logf}"
-    with open(logf) as f1:
+    with open_file(logf) as f1:
         lines = f1.readlines()
 
     atomnumber = 0
diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
index c06f8cd3d..93e3d6e15 100644
--- a/dpdata/abacus/scf.py
+++ b/dpdata/abacus/scf.py
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+from dpdata.utils import open_file
+
 from ..unit import EnergyConversion, LengthConversion, PressureConversion
 
 bohr2ang = LengthConversion("bohr", "angstrom").value()
@@ -253,7 +255,7 @@ def get_frame(fname):
     if not CheckFile(path_in):
         return data
 
-    with open(path_in) as fp:
+    with open_file(path_in) as fp:
         inlines = fp.read().split("\n")
 
     geometry_path_in = get_geometry_in(fname, inlines)
@@ -261,9 +263,9 @@ def get_frame(fname):
     if not (CheckFile(geometry_path_in) and CheckFile(path_out)):
         return data
 
-    with open(geometry_path_in) as fp:
+    with open_file(geometry_path_in) as fp:
         geometry_inlines = fp.read().split("\n")
-    with open(path_out) as fp:
+    with open_file(path_out) as fp:
         outlines = fp.read().split("\n")
 
     celldm, cell = get_cell(geometry_inlines)
@@ -338,7 +340,7 @@ def get_nele_from_stru(geometry_inlines):
 
 def get_frame_from_stru(fname):
     assert isinstance(fname, str)
-    with open(fname) as fp:
+    with open_file(fname) as fp:
         geometry_inlines = fp.read().split("\n")
     nele = get_nele_from_stru(geometry_inlines)
     inlines = ["ntype %d" % nele]
diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py
index f3217fbd9..cb4f2d25e 100644
--- a/dpdata/amber/md.py
+++ b/dpdata/amber/md.py
@@ -7,6 +7,7 @@
 
 from dpdata.amber.mask import pick_by_amber_mask
 from dpdata.unit import EnergyConversion
+from dpdata.utils import open_file
 
 from ..periodic_table import ELEMENTS
 
@@ -51,7 +52,7 @@ def read_amber_traj(
     flag_atom_numb = False
     amber_types = []
     atomic_number = []
-    with open(parm7_file) as f:
+    with open_file(parm7_file) as f:
         for line in f:
             if line.startswith("%FLAG"):
                 flag_atom_type = line.startswith("%FLAG AMBER_ATOM_TYPE")
@@ -101,14 +102,14 @@ def read_amber_traj(
         # load energy from mden_file or mdout_file
         energies = []
         if mden_file is not None and os.path.isfile(mden_file):
-            with open(mden_file) as f:
+            with open_file(mden_file) as f:
                 for line in f:
                     if line.startswith("L6"):
                         s = line.split()
                         if s[2] != "E_pot":
                             energies.append(float(s[2]))
         elif mdout_file is not None and os.path.isfile(mdout_file):
-            with open(mdout_file) as f:
+            with open_file(mdout_file) as f:
                 for line in f:
                     if "EPtot" in line:
                         s = line.split()
diff --git a/dpdata/amber/sqm.py b/dpdata/amber/sqm.py
index 1be3802a2..93e41f9aa 100644
--- a/dpdata/amber/sqm.py
+++ b/dpdata/amber/sqm.py
@@ -1,9 +1,15 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from dpdata.periodic_table import ELEMENTS
 from dpdata.unit import EnergyConversion
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 kcal2ev = EnergyConversion("kcal_mol", "eV").value()
 
@@ -14,7 +20,7 @@
 READ_FORCES = 7
 
 
-def parse_sqm_out(fname):
+def parse_sqm_out(fname: FileType):
     """Read atom symbols, charges and coordinates from ambertools sqm.out file."""
     atom_symbols = []
     coords = []
@@ -22,7 +28,7 @@ def parse_sqm_out(fname):
     forces = []
     energies = []
 
-    with open(fname) as f:
+    with open_file(fname) as f:
         flag = START
         for line in f:
             if line.startswith(" Total SCF energy"):
@@ -81,7 +87,7 @@ def parse_sqm_out(fname):
     return data
 
 
-def make_sqm_in(data, fname=None, frame_idx=0, **kwargs):
+def make_sqm_in(data, fname: FileType | None = None, frame_idx=0, **kwargs):
     symbols = [data["atom_names"][ii] for ii in data["atom_types"]]
     atomic_numbers = [ELEMENTS.index(ss) + 1 for ss in symbols]
     charge = kwargs.get("charge", 0)
@@ -109,6 +115,6 @@ def make_sqm_in(data, fname=None, frame_idx=0, **kwargs):
             f"{data['coords'][frame_idx][ii, 2]:.6f}",
         )
     if fname is not None:
-        with open(fname, "w") as fp:
+        with open_file(fname, "w") as fp:
             fp.write(ret)
     return ret
diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index ad6384088..5ba3914e7 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -8,6 +8,7 @@
 import numpy as np
 
 import dpdata
+from dpdata.utils import open_file
 
 from .raw import load_type
 
@@ -172,7 +173,7 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
     except OSError:
         pass
     if data.get("nopbc", False):
-        with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
+        with open_file(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
     # allow custom dtypes
     labels = "energies" in data
diff --git a/dpdata/deepmd/raw.py b/dpdata/deepmd/raw.py
index 717c73f96..f8ddcaf35 100644
--- a/dpdata/deepmd/raw.py
+++ b/dpdata/deepmd/raw.py
@@ -6,6 +6,7 @@
 import numpy as np
 
 import dpdata
+from dpdata.utils import open_file
 
 
 def load_type(folder, type_map=None):
@@ -17,7 +18,7 @@ def load_type(folder, type_map=None):
     data["atom_names"] = []
     # if find type_map.raw, use it
     if os.path.isfile(os.path.join(folder, "type_map.raw")):
-        with open(os.path.join(folder, "type_map.raw")) as fp:
+        with open_file(os.path.join(folder, "type_map.raw")) as fp:
             my_type_map = fp.read().split()
     # else try to use arg type_map
     elif type_map is not None:
@@ -140,7 +141,7 @@ def dump(folder, data):
     except OSError:
         pass
     if data.get("nopbc", False):
-        with open(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
+        with open_file(os.path.join(folder, "nopbc"), "w") as fw_nopbc:
             pass
     # allow custom dtypes
     labels = "energies" in data
diff --git a/dpdata/dftbplus/output.py b/dpdata/dftbplus/output.py
index 0f10c3ac9..49fdd2b1b 100644
--- a/dpdata/dftbplus/output.py
+++ b/dpdata/dftbplus/output.py
@@ -1,9 +1,18 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 
-def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.ndarray]:
+def read_dftb_plus(
+    fn_1: FileType, fn_2: FileType
+) -> tuple[str, np.ndarray, float, np.ndarray]:
     """Read from DFTB+ input and output.
 
     Parameters
@@ -29,7 +38,7 @@ def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.nda
     symbols = None
     forces = None
     energy = None
-    with open(fn_1) as f:
+    with open_file(fn_1) as f:
         flag = 0
         for line in f:
             if flag == 1:
@@ -49,7 +58,7 @@ def read_dftb_plus(fn_1: str, fn_2: str) -> tuple[str, np.ndarray, float, np.nda
                 flag += 1
                 if flag == 7:
                     flag = 0
-    with open(fn_2) as f:
+    with open_file(fn_2) as f:
         flag = 0
         for line in f:
             if line.startswith("Total Forces"):
diff --git a/dpdata/gaussian/log.py b/dpdata/gaussian/log.py
index 204cf464c..08a65b9dc 100644
--- a/dpdata/gaussian/log.py
+++ b/dpdata/gaussian/log.py
@@ -1,7 +1,14 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 from ..periodic_table import ELEMENTS
 from ..unit import EnergyConversion, ForceConversion, LengthConversion
 
@@ -12,7 +19,7 @@
 symbols = ["X"] + ELEMENTS
 
 
-def to_system_data(file_name, md=False):
+def to_system_data(file_name: FileType, md=False):
     """Read Gaussian log file.
 
     Parameters
@@ -43,7 +50,7 @@ def to_system_data(file_name, md=False):
     nopbc = True
     coords = None
 
-    with open(file_name) as fp:
+    with open_file(file_name) as fp:
         for line in fp:
             if line.startswith(" SCF Done"):
                 # energies
diff --git a/dpdata/gromacs/gro.py b/dpdata/gromacs/gro.py
index aca2443b8..fe83e0c5c 100644
--- a/dpdata/gromacs/gro.py
+++ b/dpdata/gromacs/gro.py
@@ -2,9 +2,15 @@
 from __future__ import annotations
 
 import re
+from typing import TYPE_CHECKING
 
 import numpy as np
 
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 from ..unit import LengthConversion
 
 nm2ang = LengthConversion("nm", "angstrom").value()
@@ -48,9 +54,9 @@ def _get_cell(line):
     return cell
 
 
-def file_to_system_data(fname, format_atom_name=True, **kwargs):
+def file_to_system_data(fname: FileType, format_atom_name=True, **kwargs):
     system = {"coords": [], "cells": []}
-    with open(fname) as fp:
+    with open_file(fname) as fp:
         frame = 0
         while True:
             flag = fp.readline()
diff --git a/dpdata/lammps/dump.py b/dpdata/lammps/dump.py
index f0ade2b03..72fee27df 100644
--- a/dpdata/lammps/dump.py
+++ b/dpdata/lammps/dump.py
@@ -3,9 +3,15 @@
 
 import os
 import sys
+from typing import TYPE_CHECKING
 
 import numpy as np
 
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 lib_path = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(lib_path)
 import warnings
@@ -169,11 +175,11 @@ def box2dumpbox(orig, box):
     return bounds, tilt
 
 
-def load_file(fname, begin=0, step=1):
+def load_file(fname: FileType, begin=0, step=1):
     lines = []
     buff = []
     cc = -1
-    with open(fname) as fp:
+    with open_file(fname) as fp:
         while True:
             line = fp.readline().rstrip("\n")
             if not line:
diff --git a/dpdata/openmx/omx.py b/dpdata/openmx/omx.py
index d3afff00f..aae9b5786 100644
--- a/dpdata/openmx/omx.py
+++ b/dpdata/openmx/omx.py
@@ -1,8 +1,15 @@
 #!/usr/bin/python3
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 from ..unit import (
     EnergyConversion,
     ForceConversion,
@@ -98,12 +105,12 @@ def load_cells(lines):
 
 
 # load atom_names, atom_numbs, atom_types, cells
-def load_param_file(fname, mdname):
-    with open(fname) as dat_file:
+def load_param_file(fname: FileType, mdname: FileType):
+    with open_file(fname) as dat_file:
         lines = dat_file.readlines()
     atom_names, atom_types, atom_numbs = load_atom(lines)
 
-    with open(mdname) as md_file:
+    with open_file(mdname) as md_file:
         lines = md_file.readlines()
     cells = load_cells(lines)
     return atom_names, atom_numbs, atom_types, cells
@@ -133,15 +140,15 @@ def load_coords(lines, atom_names, natoms):
     return coords
 
 
-def load_data(mdname, atom_names, natoms):
-    with open(mdname) as md_file:
+def load_data(mdname: FileType, atom_names, natoms):
+    with open_file(mdname) as md_file:
         lines = md_file.readlines()
     coords = load_coords(lines, atom_names, natoms)
     steps = [str(i) for i in range(1, coords.shape[0] + 1)]
     return coords, steps
 
 
-def to_system_data(fname, mdname):
+def to_system_data(fname: FileType, mdname: FileType):
     data = {}
     (
         data["atom_names"],
@@ -194,7 +201,7 @@ def load_force(lines, atom_names, atom_numbs):
 # load energy, force
 def to_system_label(fname, mdname):
     atom_names, atom_numbs, atom_types, cells = load_param_file(fname, mdname)
-    with open(mdname) as md_file:
+    with open_file(mdname) as md_file:
         lines = md_file.readlines()
     energy = load_energy(lines)
     force = load_force(lines, atom_names, atom_numbs)
diff --git a/dpdata/orca/output.py b/dpdata/orca/output.py
index a23013fda..a0915162b 100644
--- a/dpdata/orca/output.py
+++ b/dpdata/orca/output.py
@@ -1,9 +1,18 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 
-def read_orca_sp_output(fn: str) -> tuple[np.ndarray, np.ndarray, float, np.ndarray]:
+def read_orca_sp_output(
+    fn: FileType,
+) -> tuple[np.ndarray, np.ndarray, float, np.ndarray]:
     """Read from ORCA output.
 
     Note that both the energy and the gradient should be printed.
@@ -28,7 +37,7 @@ def read_orca_sp_output(fn: str) -> tuple[np.ndarray, np.ndarray, float, np.ndar
     symbols = None
     forces = None
     energy = None
-    with open(fn) as f:
+    with open_file(fn) as f:
         flag = 0
         for line in f:
             if flag in (1, 3, 4):
diff --git a/dpdata/plugins/abacus.py b/dpdata/plugins/abacus.py
index eb2d7786f..e3367b351 100644
--- a/dpdata/plugins/abacus.py
+++ b/dpdata/plugins/abacus.py
@@ -1,9 +1,15 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import dpdata.abacus.md
 import dpdata.abacus.relax
 import dpdata.abacus.scf
 from dpdata.format import Format
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 
 @Format.register("abacus/stru")
@@ -12,7 +18,7 @@ class AbacusSTRUFormat(Format):
     def from_system(self, file_name, **kwargs):
         return dpdata.abacus.scf.get_frame_from_stru(file_name)
 
-    def to_system(self, data, file_name, frame_idx=0, **kwargs):
+    def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs):
         """Dump the system into ABACUS STRU format file.
 
         Parameters
@@ -46,7 +52,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs):
             numerical_descriptor=numerical_descriptor,
             mass=mass,
         )
-        with open(file_name, "w") as fp:
+        with open_file(file_name, "w") as fp:
             fp.write(stru_string)
 
 
diff --git a/dpdata/plugins/amber.py b/dpdata/plugins/amber.py
index 42fce5528..361e0d8a7 100644
--- a/dpdata/plugins/amber.py
+++ b/dpdata/plugins/amber.py
@@ -8,6 +8,7 @@
 import dpdata.amber.sqm
 from dpdata.driver import Driver, Minimizer
 from dpdata.format import Format
+from dpdata.utils import open_file
 
 
 @Format.register("amber/md")
@@ -143,7 +144,7 @@ def label(self, data: dict) -> dict:
                         [*self.sqm_exec.split(), "-O", "-i", inp_fn, "-o", out_fn]
                     )
                 except sp.CalledProcessError as e:
-                    with open(out_fn) as f:
+                    with open_file(out_fn) as f:
                         raise RuntimeError(
                             "Run sqm failed! Output:\n" + f.read()
                         ) from e
diff --git a/dpdata/plugins/gaussian.py b/dpdata/plugins/gaussian.py
index b55447b91..55bee5a4c 100644
--- a/dpdata/plugins/gaussian.py
+++ b/dpdata/plugins/gaussian.py
@@ -3,16 +3,21 @@
 import os
 import subprocess as sp
 import tempfile
+from typing import TYPE_CHECKING
 
 import dpdata.gaussian.gjf
 import dpdata.gaussian.log
 from dpdata.driver import Driver
 from dpdata.format import Format
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 
 @Format.register("gaussian/log")
 class GaussianLogFormat(Format):
-    def from_labeled_system(self, file_name, md=False, **kwargs):
+    def from_labeled_system(self, file_name: FileType, md=False, **kwargs):
         try:
             return dpdata.gaussian.log.to_system_data(file_name, md=md)
         except AssertionError:
@@ -21,7 +26,7 @@ def from_labeled_system(self, file_name, md=False, **kwargs):
 
 @Format.register("gaussian/md")
 class GaussianMDFormat(Format):
-    def from_labeled_system(self, file_name, **kwargs):
+    def from_labeled_system(self, file_name: FileType, **kwargs):
         return GaussianLogFormat().from_labeled_system(file_name, md=True)
 
 
@@ -29,7 +34,7 @@ def from_labeled_system(self, file_name, **kwargs):
 class GaussiaGJFFormat(Format):
     """Gaussian input file."""
 
-    def from_system(self, file_name: str, **kwargs):
+    def from_system(self, file_name: FileType, **kwargs):
         """Read Gaussian input file.
 
         Parameters
@@ -39,11 +44,11 @@ def from_system(self, file_name: str, **kwargs):
         **kwargs : dict
             keyword arguments
         """
-        with open(file_name) as fp:
+        with open_file(file_name) as fp:
             text = fp.read()
         return dpdata.gaussian.gjf.read_gaussian_input(text)
 
-    def to_system(self, data: dict, file_name: str, **kwargs):
+    def to_system(self, data: dict, file_name: FileType, **kwargs):
         """Generate Gaussian input file.
 
         Parameters
@@ -56,7 +61,7 @@ def to_system(self, data: dict, file_name: str, **kwargs):
             Other parameters to make input files. See :meth:`dpdata.gaussian.gjf.make_gaussian_input`
         """
         text = dpdata.gaussian.gjf.make_gaussian_input(data, **kwargs)
-        with open(file_name, "w") as fp:
+        with open_file(file_name, "w") as fp:
             fp.write(text)
 
 
@@ -110,7 +115,7 @@ def label(self, data: dict) -> dict:
                 try:
                     sp.check_output([*self.gaussian_exec.split(), inp_fn])
                 except sp.CalledProcessError as e:
-                    with open(out_fn) as f:
+                    with open_file(out_fn) as f:
                         out = f.read()
                     raise RuntimeError("Run gaussian failed! Output:\n" + out) from e
                 labeled_system.append(dpdata.LabeledSystem(out_fn, fmt="gaussian/log"))
diff --git a/dpdata/plugins/gromacs.py b/dpdata/plugins/gromacs.py
index 12dece718..a7066bbcc 100644
--- a/dpdata/plugins/gromacs.py
+++ b/dpdata/plugins/gromacs.py
@@ -1,7 +1,13 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import dpdata.gromacs.gro
 from dpdata.format import Format
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 
 @Format.register("gro")
@@ -23,7 +29,9 @@ def from_system(self, file_name, format_atom_name=True, **kwargs):
             file_name, format_atom_name=format_atom_name, **kwargs
         )
 
-    def to_system(self, data, file_name=None, frame_idx=-1, **kwargs):
+    def to_system(
+        self, data, file_name: FileType | None = None, frame_idx=-1, **kwargs
+    ):
         """Dump the system in gromacs .gro format.
 
         Parameters
@@ -52,5 +60,5 @@ def to_system(self, data, file_name=None, frame_idx=-1, **kwargs):
         if file_name is None:
             return gro_str
         else:
-            with open(file_name, "w+") as fp:
+            with open_file(file_name, "w+") as fp:
                 fp.write(gro_str)
diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py
index 65e7f5701..0327d444b 100644
--- a/dpdata/plugins/lammps.py
+++ b/dpdata/plugins/lammps.py
@@ -1,20 +1,26 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import dpdata.lammps.dump
 import dpdata.lammps.lmp
 from dpdata.format import Format
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 
 @Format.register("lmp")
 @Format.register("lammps/lmp")
 class LAMMPSLmpFormat(Format):
     @Format.post("shift_orig_zero")
-    def from_system(self, file_name, type_map=None, **kwargs):
-        with open(file_name) as fp:
+    def from_system(self, file_name: FileType, type_map=None, **kwargs):
+        with open_file(file_name) as fp:
             lines = [line.rstrip("\n") for line in fp]
         return dpdata.lammps.lmp.to_system_data(lines, type_map)
 
-    def to_system(self, data, file_name, frame_idx=0, **kwargs):
+    def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs):
         """Dump the system in lammps data format.
 
         Parameters
@@ -30,7 +36,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs):
         """
         assert frame_idx < len(data["coords"])
         w_str = dpdata.lammps.lmp.from_system_data(data, frame_idx)
-        with open(file_name, "w") as fp:
+        with open_file(file_name, "w") as fp:
             fp.write(w_str)
 
 
diff --git a/dpdata/plugins/n2p2.py b/dpdata/plugins/n2p2.py
index b70d6e6fb..28942ff5f 100644
--- a/dpdata/plugins/n2p2.py
+++ b/dpdata/plugins/n2p2.py
@@ -1,8 +1,14 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from dpdata.format import Format
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 from ..unit import EnergyConversion, ForceConversion, LengthConversion
 
@@ -44,7 +50,7 @@ class N2P2Format(Format):
     For more information about the n2p2 format, please refer to https://compphysvienna.github.io/n2p2/topics/cfg_file.html
     """
 
-    def from_labeled_system(self, file_name, **kwargs):
+    def from_labeled_system(self, file_name: FileType, **kwargs):
         """Read from n2p2 format.
 
         Parameters
@@ -67,7 +73,7 @@ def from_labeled_system(self, file_name, **kwargs):
         natom0 = None
         natoms0 = None
         atom_types0 = None
-        with open(file_name) as file:
+        with open_file(file_name) as file:
             for line in file:
                 line = line.strip()  # Remove leading/trailing whitespace
                 if line.lower() == "begin":
@@ -155,7 +161,7 @@ def from_labeled_system(self, file_name, **kwargs):
             "forces": forces,
         }
 
-    def to_labeled_system(self, data, file_name, **kwargs):
+    def to_labeled_system(self, data, file_name: FileType, **kwargs):
         """Write n2p2 format.
 
         By default, LabeledSystem.to will fallback to System.to.
@@ -193,5 +199,5 @@ def to_labeled_system(self, data, file_name, **kwargs):
             buff.append(f"energy {energy:15.6f}")
             buff.append(f"charge {0:15.6f}")
             buff.append("end")
-        with open(file_name, "w") as fp:
+        with open_file(file_name, "w") as fp:
             fp.write("\n".join(buff))
diff --git a/dpdata/plugins/orca.py b/dpdata/plugins/orca.py
index 9dc32c32c..7a0b806c9 100644
--- a/dpdata/plugins/orca.py
+++ b/dpdata/plugins/orca.py
@@ -1,11 +1,16 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from dpdata.format import Format
 from dpdata.orca.output import read_orca_sp_output
 from dpdata.unit import EnergyConversion, ForceConversion
 
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 energy_convert = EnergyConversion("hartree", "eV").value()
 force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
 
@@ -18,12 +23,12 @@ class ORCASPOutFormat(Format):
     printed into the output file.
     """
 
-    def from_labeled_system(self, file_name: str, **kwargs) -> dict:
+    def from_labeled_system(self, file_name: FileType, **kwargs) -> dict:
         """Read from ORCA single point energy output.
 
         Parameters
         ----------
-        file_name : str
+        file_name : FileType
             file name
         **kwargs
             keyword arguments
diff --git a/dpdata/plugins/psi4.py b/dpdata/plugins/psi4.py
index a0cf00e46..2bbfc2321 100644
--- a/dpdata/plugins/psi4.py
+++ b/dpdata/plugins/psi4.py
@@ -1,11 +1,17 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from dpdata.format import Format
 from dpdata.psi4.input import write_psi4_input
 from dpdata.psi4.output import read_psi4_output
 from dpdata.unit import EnergyConversion, ForceConversion
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 energy_convert = EnergyConversion("hartree", "eV").value()
 force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
@@ -19,12 +25,12 @@ class PSI4OutFormat(Format):
     printed into the output file.
     """
 
-    def from_labeled_system(self, file_name: str, **kwargs) -> dict:
+    def from_labeled_system(self, file_name: FileType, **kwargs) -> dict:
         """Read from Psi4 output.
 
         Parameters
         ----------
-        file_name : str
+        file_name : FileType
             file name
         **kwargs
             keyword arguments
@@ -61,7 +67,7 @@ class PSI4InputFormat(Format):
     def to_system(
         self,
         data: dict,
-        file_name: str,
+        file_name: FileType,
         method: str,
         basis: str,
         charge: int = 0,
@@ -91,7 +97,7 @@ def to_system(
             keyword arguments
         """
         types = np.array(data["atom_names"])[data["atom_types"]]
-        with open(file_name, "w") as fout:
+        with open_file(file_name, "w") as fout:
             fout.write(
                 write_psi4_input(
                     types=types,
diff --git a/dpdata/plugins/pwmat.py b/dpdata/plugins/pwmat.py
index 80f219b6c..ba3dab160 100644
--- a/dpdata/plugins/pwmat.py
+++ b/dpdata/plugins/pwmat.py
@@ -1,10 +1,16 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 import dpdata.pwmat.atomconfig
 import dpdata.pwmat.movement
 from dpdata.format import Format
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 
 @Format.register("movement")
@@ -47,12 +53,12 @@ def from_labeled_system(
 @Format.register("pwmat/final.config")
 class PwmatAtomconfigFormat(Format):
     @Format.post("rot_lower_triangular")
-    def from_system(self, file_name, **kwargs):
-        with open(file_name) as fp:
+    def from_system(self, file_name: FileType, **kwargs):
+        with open_file(file_name) as fp:
             lines = [line.rstrip("\n") for line in fp]
         return dpdata.pwmat.atomconfig.to_system_data(lines)
 
-    def to_system(self, data, file_name, frame_idx=0, *args, **kwargs):
+    def to_system(self, data, file_name: FileType, frame_idx=0, *args, **kwargs):
         """Dump the system in pwmat atom.config format.
 
         Parameters
@@ -70,5 +76,5 @@ def to_system(self, data, file_name, frame_idx=0, *args, **kwargs):
         """
         assert frame_idx < len(data["coords"])
         w_str = dpdata.pwmat.atomconfig.from_system_data(data, frame_idx)
-        with open(file_name, "w") as fp:
+        with open_file(file_name, "w") as fp:
             fp.write(w_str)
diff --git a/dpdata/plugins/vasp.py b/dpdata/plugins/vasp.py
index d0681cebf..0160bde29 100644
--- a/dpdata/plugins/vasp.py
+++ b/dpdata/plugins/vasp.py
@@ -1,12 +1,17 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 import dpdata.vasp.outcar
 import dpdata.vasp.poscar
 import dpdata.vasp.xml
 from dpdata.format import Format
-from dpdata.utils import uniq_atom_names
+from dpdata.utils import open_file, uniq_atom_names
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 
 @Format.register("poscar")
@@ -15,14 +20,14 @@
 @Format.register("vasp/contcar")
 class VASPPoscarFormat(Format):
     @Format.post("rot_lower_triangular")
-    def from_system(self, file_name, **kwargs):
-        with open(file_name) as fp:
+    def from_system(self, file_name: FileType, **kwargs):
+        with open_file(file_name) as fp:
             lines = [line.rstrip("\n") for line in fp]
         data = dpdata.vasp.poscar.to_system_data(lines)
         data = uniq_atom_names(data)
         return data
 
-    def to_system(self, data, file_name, frame_idx=0, **kwargs):
+    def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs):
         """Dump the system in vasp POSCAR format.
 
         Parameters
@@ -37,7 +42,7 @@ def to_system(self, data, file_name, frame_idx=0, **kwargs):
             other parameters
         """
         w_str = VASPStringFormat().to_system(data, frame_idx=frame_idx)
-        with open(file_name, "w") as fp:
+        with open_file(file_name, "w") as fp:
             fp.write(w_str)
 
 
diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index 322bf77cb..d56a8618c 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -1,8 +1,14 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from dpdata.format import Format
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems
 from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord
 
@@ -16,16 +22,16 @@ class XYZFormat(Format):
     >>> s.to("xyz", "a.xyz")
     """
 
-    def to_system(self, data, file_name, **kwargs):
+    def to_system(self, data, file_name: FileType, **kwargs):
         buff = []
         types = np.array(data["atom_names"])[data["atom_types"]]
         for cc in data["coords"]:
             buff.append(coord_to_xyz(cc, types))
-        with open(file_name, "w") as fp:
+        with open_file(file_name, "w") as fp:
             fp.write("\n".join(buff))
 
-    def from_system(self, file_name, **kwargs):
-        with open(file_name) as fp:
+    def from_system(self, file_name: FileType, **kwargs):
+        with open_file(file_name) as fp:
             coords, types = xyz_to_coord(fp.read())
         atom_names, atom_types, atom_numbs = np.unique(
             types, return_inverse=True, return_counts=True
diff --git a/dpdata/psi4/output.py b/dpdata/psi4/output.py
index c06eb1829..c3594ffb4 100644
--- a/dpdata/psi4/output.py
+++ b/dpdata/psi4/output.py
@@ -1,11 +1,17 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import numpy as np
 
 from dpdata.unit import LengthConversion
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
 
 
-def read_psi4_output(fn: str) -> tuple[str, np.ndarray, float, np.ndarray]:
+def read_psi4_output(fn: FileType) -> tuple[str, np.ndarray, float, np.ndarray]:
     """Read from Psi4 output.
 
     Note that both the energy and the gradient should be printed.
@@ -31,7 +37,7 @@ def read_psi4_output(fn: str) -> tuple[str, np.ndarray, float, np.ndarray]:
     forces = None
     energy = None
     length_unit = None
-    with open(fn) as f:
+    with open_file(fn) as f:
         flag = 0
         for line in f:
             if flag in (1, 3, 4, 5, 6):
diff --git a/dpdata/qe/scf.py b/dpdata/qe/scf.py
index 37e5fbab6..f86708605 100755
--- a/dpdata/qe/scf.py
+++ b/dpdata/qe/scf.py
@@ -5,6 +5,8 @@
 
 import numpy as np
 
+from dpdata.utils import open_file
+
 ry2ev = 13.605693009
 bohr2ang = 0.52917721067
 kbar2evperang3 = 1e3 / 1.602176621e6
@@ -142,9 +144,9 @@ def get_frame(fname):
         path_out = fname[1]
     else:
         raise RuntimeError("invalid input")
-    with open(path_out) as fp:
+    with open_file(path_out) as fp:
         outlines = fp.read().split("\n")
-    with open(path_in) as fp:
+    with open_file(path_in) as fp:
         inlines = fp.read().split("\n")
     cell = get_cell(inlines)
     atom_names, natoms, types, coords = get_coords(inlines, cell)
diff --git a/dpdata/qe/traj.py b/dpdata/qe/traj.py
index 1fbf0f71c..b4be303ad 100644
--- a/dpdata/qe/traj.py
+++ b/dpdata/qe/traj.py
@@ -2,9 +2,15 @@
 from __future__ import annotations
 
 import warnings
+from typing import TYPE_CHECKING
 
 import numpy as np
 
+from dpdata.utils import open_file
+
+if TYPE_CHECKING:
+    from dpdata.utils import FileType
+
 from ..unit import (
     EnergyConversion,
     ForceConversion,
@@ -87,8 +93,8 @@ def load_atom_types(lines, natoms, atom_names):
     return np.array(ret, dtype=int)
 
 
-def load_param_file(fname):
-    with open(fname) as fp:
+def load_param_file(fname: FileType):
+    with open_file(fname) as fp:
         lines = fp.read().split("\n")
     natoms = int(load_key(lines, "nat"))
     ntypes = int(load_key(lines, "ntyp"))
@@ -127,11 +133,11 @@ def _load_pos_block(fp, natoms):
         return blk, ss
 
 
-def load_data(fname, natoms, begin=0, step=1, convert=1.0):
+def load_data(fname: FileType, natoms, begin=0, step=1, convert=1.0):
     coords = []
     steps = []
     cc = 0
-    with open(fname) as fp:
+    with open_file(fname) as fp:
         while True:
             blk, ss = _load_pos_block(fp, natoms)
             if blk is None:
@@ -147,7 +153,7 @@ def load_data(fname, natoms, begin=0, step=1, convert=1.0):
 
 # def load_pos(fname, natoms) :
 #     coords = []
-#     with open(fname) as fp:
+#     with open_file(fname) as fp:
 #         while True:
 #             blk = _load_pos_block(fp, natoms)
 #             # print(blk)
@@ -164,7 +170,7 @@ def load_energy(fname, begin=0, step=1):
     steps = []
     for ii in data[begin::step, 0]:
         steps.append("%d" % ii)
-    with open(fname) as fp:
+    with open_file(fname) as fp:
         while True:
             line = fp.readline()
             if not line:
@@ -178,7 +184,7 @@ def load_energy(fname, begin=0, step=1):
 
 # def load_force(fname, natoms) :
 #     coords = []
-#     with open(fname) as fp:
+#     with open_file(fname) as fp:
 #         while True:
 #             blk = _load_pos_block(fp, natoms)
 #             # print(blk)
diff --git a/dpdata/utils.py b/dpdata/utils.py
index e008120ea..8942bd54d 100644
--- a/dpdata/utils.py
+++ b/dpdata/utils.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
+import io
+import os
 import sys
-from typing import overload
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, Generator, overload
 
 if sys.version_info >= (3, 8):
     from typing import Literal
@@ -129,3 +132,44 @@ def uniq_atom_names(data):
 def utf8len(s: str) -> int:
     """Return the byte length of a string."""
     return len(s.encode("utf-8"))
+
+
+if TYPE_CHECKING:
+    FileType = io.IOBase | str | os.PathLike
+
+
+@contextmanager
+def open_file(file: FileType, *args, **kwargs) -> Generator[io.IOBase, None, None]:
+    """A context manager that yields a file object.
+
+    Parameters
+    ----------
+    file : file object or file path
+        A file object or a file path.
+
+    Yields
+    ------
+    file : io.IOBase
+        A file object.
+    *args
+        parameters to open
+    **kwargs
+        other parameters
+
+    Raises
+    ------
+    ValueError
+        If file is not a file object or a file
+
+    Examples
+    --------
+    >>> with open_file("file.txt") as file:
+    ...     print(file.read())
+    """
+    if isinstance(file, io.IOBase):
+        yield file
+    elif isinstance(file, (str, os.PathLike)):
+        with open(file, *args, **kwargs) as f:
+            yield f
+    else:
+        raise ValueError("file must be a file object or a file path.")
diff --git a/tests/test_read_file.py b/tests/test_read_file.py
new file mode 100644
index 000000000..e7dca54e6
--- /dev/null
+++ b/tests/test_read_file.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+import io
+import unittest
+from pathlib import Path
+
+from dpdata.utils import open_file
+
+
+class TestReadFile(unittest.TestCase):
+    def test_open_file_from_string_io(self):
+        string_io = io.StringIO("Hello, world!")
+        with open_file(string_io) as file:
+            self.assertEqual(file.read(), "Hello, world!")
+
+    def test_open_file_from_file_str(self):
+        with open_file("/dev/null") as file:
+            self.assertEqual(file.read(), Path("/dev/null").read_text())
+
+    def test_open_file_from_file_path(self):
+        with open_file(Path("/dev/null")) as file:
+            self.assertEqual(file.read(), Path("/dev/null").read_text())

From c311d16ff9db600815acdf23b005fa4ad388607c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 2 Sep 2024 22:59:20 -0400
Subject: [PATCH 23/24] fix: only apply comp_prec for floating dtypes (#711)

Fix #703.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Improved handling of data types during the reshaping process, ensuring
type conversion only occurs for floating-point data.

- **Bug Fixes**
- Enhanced robustness of data processing by preventing unnecessary type
casting for non-floating-point data types.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
---
 dpdata/deepmd/comp.py | 4 +++-
 dpdata/deepmd/hdf5.py | 7 ++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/dpdata/deepmd/comp.py b/dpdata/deepmd/comp.py
index 5ba3914e7..3d656b709 100644
--- a/dpdata/deepmd/comp.py
+++ b/dpdata/deepmd/comp.py
@@ -204,7 +204,9 @@ def dump(folder, data, set_size=5000, comp_prec=np.float32, remove_sets=True):
                 f"Shape of {dtype.name} is not (nframes, ...), but {dtype.shape}. This type of data will not converted to deepmd/npy format."
             )
             continue
-        ddata = np.reshape(data[dtype.name], [nframes, -1]).astype(comp_prec)
+        ddata = np.reshape(data[dtype.name], [nframes, -1])
+        if np.issubdtype(ddata.dtype, np.floating):
+            ddata = ddata.astype(comp_prec)
         for ii in range(nsets):
             set_stt = ii * set_size
             set_end = (ii + 1) * set_size
diff --git a/dpdata/deepmd/hdf5.py b/dpdata/deepmd/hdf5.py
index 78f4e33fe..6b5878932 100644
--- a/dpdata/deepmd/hdf5.py
+++ b/dpdata/deepmd/hdf5.py
@@ -252,9 +252,10 @@ def dump(
     for dt, prop in data_types.items():
         if dt in data:
             if prop["dump"]:
-                reshaped_data[dt] = np.reshape(data[dt], prop["shape"]).astype(
-                    comp_prec
-                )
+                ddata = np.reshape(data[dt], prop["shape"])
+                if np.issubdtype(ddata.dtype, np.floating):
+                    ddata = ddata.astype(comp_prec)
+                reshaped_data[dt] = ddata
 
     # dump frame properties: cell, coord, energy, force and virial
     nsets = nframes // set_size

From bad02850c7cb982f937ce89e296809948081f9c7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 3 Sep 2024 11:00:47 +0800
Subject: [PATCH 24/24] [pre-commit.ci] pre-commit autoupdate (#712)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!--pre-commit.ci start-->
updates:
- [github.com/astral-sh/ruff-pre-commit: v0.6.2 →
v0.6.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.2...v0.6.3)
<!--pre-commit.ci end-->

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f4abb50c3..3f43b6975 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
 # Python
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.6.2
+    rev: v0.6.3
     hooks:
     - id: ruff
       args: ["--fix"]