From 4d39a945fdcf666aebf38f1b53f0cd79320a778f Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 16:49:15 -0400
Subject: [PATCH 01/12] add support for n2p2 data format

---
 dpdata/plugins/np2p.py | 174 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 dpdata/plugins/np2p.py

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/np2p.py
new file mode 100644
index 000000000..8d326d040
--- /dev/null
+++ b/dpdata/plugins/np2p.py
@@ -0,0 +1,174 @@
+import numpy as np
+
+from dpdata.format import Format
+from ..unit import EnergyConversion, ForceConversion, LengthConversion
+
+length_convert = LengthConversion("bohr", "angstrom").value()
+energy_convert = EnergyConversion("hartree", "eV").value()
+force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
+
+def match_indices(atype1, atype2):
+    # Ensure atype2 is a numpy array for efficient operations
+    atype2 = np.array(atype2)
+    # Placeholder for matched indices
+    matched_indices = []
+    # Track used indices to handle duplicates
+    used_indices = set()
+
+    # Iterate over each element in atype1
+    for element in atype1:
+        # Find all indices of the current element in atype2
+        # np.where returns a tuple, so [0] is used to access the array of indices
+        indices = np.where(atype2 == element)[0]
+        
+        # Find the first unused index
+        for index in indices:
+            if index not in used_indices:
+                # Add the index to the results and mark it as used
+                matched_indices.append(index)
+                used_indices.add(index)
+                break  # Move to the next element in atype1
+
+    return matched_indices
+
+@Format.register("n2p2")
+class N2P2Format(Format):
+    def from_labeled_system(self, file_name, **kwargs):
+        """Implement LabeledSystem.from that converts from this format to LabeledSystem.
+
+        Parameters
+        ----------
+        file_name : str
+            file name, i.e. the first argument
+        **kwargs : dict
+            keyword arguments that will be passed from the method
+
+        Returns
+        -------
+        data : dict
+            system data, whose keys are defined in LabeledSystem.DTYPES
+        """
+        cells = []
+        coords = []
+        atypes = []
+        forces = []
+        energies = []
+        natom0 = None
+        natoms0 = None
+        atom_types0 = None
+        with open(file_name, 'r') as file:
+            for line in file:
+                line = line.strip()  # Remove leading/trailing whitespace
+                if line.lower() == 'begin':
+                    current_section = []  # Start a new section
+                    cell = []
+                    coord = []
+                    atype = []
+                    force = []
+                    energy = None
+                elif line.lower() == 'end':
+                    # If we are at the end of a section, process the section
+                    assert len(coord) == len(atype) == len(force), "Number of atoms, atom types, and forces must match."
+
+                    # Check if the number of atoms is consistent across all frames
+                    natom = len(coord)
+                    if natom0 is None:
+                        natom0 = natom
+                    else:
+                        assert natom == natom0, "The number of atoms in all frames must be the same."
+
+                    # Check if the number of atoms of each type is consistent across all frames
+                    atype = np.array(atype)
+                    atype_sroted = sorted(atype)
+                    unique_atypes = set(atype_sroted)
+                    unique_atypes_list = list(unique_atypes)
+                    ntypes = len(unique_atypes)
+                    natoms = [atype_sroted.count(at) for at in unique_atypes]
+                    if natoms0 is None:
+                        natoms0 = natoms
+                    else:
+                        assert natoms == natoms0, "The number of atoms of each type in all frames must be the same."
+                    if atom_types0 is None:
+                        atom_types0 = atype
+                    atom_order = match_indices(atom_types0, atype)
+
+                    cell = np.array(cell, dtype=float)
+                    coord = np.array(coord, dtype=float)[atom_order]
+                    force = np.array(force, dtype=float)[atom_order]
+
+                    cells.append(cell)
+                    coords.append(coord)
+                    forces.append(force)
+                    energies.append(float(energy))
+
+                    current_section = None  # Reset for the next section
+                elif current_section is not None:
+                    # If we are inside a section, append the line to the current section
+                    # current_section.append(line)
+                    line_contents = line.split()
+                    if line_contents[0] == 'lattice':
+                        cell.append(line_contents[1:])
+                    elif line_contents[0] == 'atom':
+                        coord.append(line_contents[1:4])
+                        atype.append(line_contents[4])
+                        force.append(line_contents[7:10])
+                    elif line_contents[0] == 'energy':
+                        energy = line_contents[1]
+                
+        atom_names = unique_atypes_list
+        atom_numbs = natoms
+        atom_types = np.zeros(len(atom_types0), dtype=int)
+        for i in range(ntypes):
+            atom_types[atom_types0 == unique_atypes_list[i]] = i
+        
+        cells = np.array(cells) * length_convert
+        coords = np.array(coords) * length_convert
+        forces = np.array(forces) * force_convert
+        energies = np.array(energies) * energy_convert
+
+        return {
+            "atom_names": list(atom_names),
+            "atom_numbs": list(atom_numbs),
+            "atom_types": atom_types,
+            "coords": coords,
+            "cells": cells,
+            "nopbc": False,
+            "orig": np.zeros(3),
+            "energies": energies,
+            "forces": forces,
+        }
+
+    def to_labeled_system(self, data, file_name, **kwargs):
+        """Implement LabeledSystem.to that converts from LabeledSystem to this format.
+
+        By default, LabeledSystem.to will fallback to System.to.
+
+        Parameters
+        ----------
+        data : dict
+            system data, whose keys are defined in LabeledSystem.DTYPES
+        *args : list
+            arguments that will be passed from the method
+        **kwargs : dict
+            keyword arguments that will be passed from the method
+        """
+        buff = []
+        nframe = len(data["energies"])
+        natom = len(data["atom_types"])
+        atom_names = data["atom_names"]
+        for frame in range(nframe):
+            coord = data["coords"][frame] / length_convert
+            force = data["forces"][frame] / force_convert
+            energy = data["energies"][frame] / energy_convert
+            cell = data["cells"][frame] / length_convert
+            atype = data["atom_types"]
+            buff.append("begin")
+            for i in range(3):
+                buff.append(f"lattice {cell[i][0]:15.6f}  {cell[i][2]:15.6f}  {cell[i][1]:15.6f}")
+            for i in range(natom):
+                buff.append(f"atom {coord[i][0]:15.6f} {coord[i][1]:15.6f} {coord[i][2]:15.6f} {atom_names[atype[i]]:>7} {0:15.6f} {0:15.6f} {force[i][0]:15.6e} {force[i][1]:15.6e} {force[i][2]:15.6e}")
+            buff.append(f"energy {energy:15.6f}")
+            buff.append(f"charge {0:15.6f}")
+            buff.append("end")
+        with open(file_name, "w") as fp:
+            fp.write("\n".join(buff))
\ No newline at end of file

From 5cac82ec839ce961380efcf5c4bdb6a550ef078d Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 16:51:30 -0400
Subject: [PATCH 02/12] add n2p2 in README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 67942ac8f..ab489a75e 100644
--- a/README.md
+++ b/README.md
@@ -96,6 +96,7 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
 | ABACUS  | STRU        | True         | True    | LabeledSystem | 'abacus/relax'       |
 | ase     | structure   | True         | True    | MultiSystems  | 'ase/structure'      |
 | DFTB+   | dftbplus    | False        | True    | LabeledSystem | 'dftbplus'           |
+| n2p2    | n2p2        | True         | True    | LabeledSystem | 'n2p2'               |
 
 
 The Class `dpdata.MultiSystems`  can read data  from a dir which may contains many files of different systems, or from single xyz file which contains different systems.

From 4cf536fa5e2da4ddfc1edd6ca562282fe80a994b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 2 Apr 2024 20:52:05 +0000
Subject: [PATCH 03/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 dpdata/plugins/np2p.py | 43 +++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/np2p.py
index 8d326d040..97c718f96 100644
--- a/dpdata/plugins/np2p.py
+++ b/dpdata/plugins/np2p.py
@@ -1,12 +1,14 @@
 import numpy as np
 
 from dpdata.format import Format
+
 from ..unit import EnergyConversion, ForceConversion, LengthConversion
 
 length_convert = LengthConversion("bohr", "angstrom").value()
 energy_convert = EnergyConversion("hartree", "eV").value()
 force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
 
+
 def match_indices(atype1, atype2):
     # Ensure atype2 is a numpy array for efficient operations
     atype2 = np.array(atype2)
@@ -20,7 +22,7 @@ def match_indices(atype1, atype2):
         # Find all indices of the current element in atype2
         # np.where returns a tuple, so [0] is used to access the array of indices
         indices = np.where(atype2 == element)[0]
-        
+
         # Find the first unused index
         for index in indices:
             if index not in used_indices:
@@ -31,6 +33,7 @@ def match_indices(atype1, atype2):
 
     return matched_indices
 
+
 @Format.register("n2p2")
 class N2P2Format(Format):
     def from_labeled_system(self, file_name, **kwargs):
@@ -56,26 +59,30 @@ def from_labeled_system(self, file_name, **kwargs):
         natom0 = None
         natoms0 = None
         atom_types0 = None
-        with open(file_name, 'r') as file:
+        with open(file_name) as file:
             for line in file:
                 line = line.strip()  # Remove leading/trailing whitespace
-                if line.lower() == 'begin':
+                if line.lower() == "begin":
                     current_section = []  # Start a new section
                     cell = []
                     coord = []
                     atype = []
                     force = []
                     energy = None
-                elif line.lower() == 'end':
+                elif line.lower() == "end":
                     # If we are at the end of a section, process the section
-                    assert len(coord) == len(atype) == len(force), "Number of atoms, atom types, and forces must match."
+                    assert (
+                        len(coord) == len(atype) == len(force)
+                    ), "Number of atoms, atom types, and forces must match."
 
                     # Check if the number of atoms is consistent across all frames
                     natom = len(coord)
                     if natom0 is None:
                         natom0 = natom
                     else:
-                        assert natom == natom0, "The number of atoms in all frames must be the same."
+                        assert (
+                            natom == natom0
+                        ), "The number of atoms in all frames must be the same."
 
                     # Check if the number of atoms of each type is consistent across all frames
                     atype = np.array(atype)
@@ -87,7 +94,9 @@ def from_labeled_system(self, file_name, **kwargs):
                     if natoms0 is None:
                         natoms0 = natoms
                     else:
-                        assert natoms == natoms0, "The number of atoms of each type in all frames must be the same."
+                        assert (
+                            natoms == natoms0
+                        ), "The number of atoms of each type in all frames must be the same."
                     if atom_types0 is None:
                         atom_types0 = atype
                     atom_order = match_indices(atom_types0, atype)
@@ -106,21 +115,21 @@ def from_labeled_system(self, file_name, **kwargs):
                     # If we are inside a section, append the line to the current section
                     # current_section.append(line)
                     line_contents = line.split()
-                    if line_contents[0] == 'lattice':
+                    if line_contents[0] == "lattice":
                         cell.append(line_contents[1:])
-                    elif line_contents[0] == 'atom':
+                    elif line_contents[0] == "atom":
                         coord.append(line_contents[1:4])
                         atype.append(line_contents[4])
                         force.append(line_contents[7:10])
-                    elif line_contents[0] == 'energy':
+                    elif line_contents[0] == "energy":
                         energy = line_contents[1]
-                
+
         atom_names = unique_atypes_list
         atom_numbs = natoms
         atom_types = np.zeros(len(atom_types0), dtype=int)
         for i in range(ntypes):
             atom_types[atom_types0 == unique_atypes_list[i]] = i
-        
+
         cells = np.array(cells) * length_convert
         coords = np.array(coords) * length_convert
         forces = np.array(forces) * force_convert
@@ -164,11 +173,15 @@ def to_labeled_system(self, data, file_name, **kwargs):
             atype = data["atom_types"]
             buff.append("begin")
             for i in range(3):
-                buff.append(f"lattice {cell[i][0]:15.6f}  {cell[i][2]:15.6f}  {cell[i][1]:15.6f}")
+                buff.append(
+                    f"lattice {cell[i][0]:15.6f}  {cell[i][2]:15.6f}  {cell[i][1]:15.6f}"
+                )
             for i in range(natom):
-                buff.append(f"atom {coord[i][0]:15.6f} {coord[i][1]:15.6f} {coord[i][2]:15.6f} {atom_names[atype[i]]:>7} {0:15.6f} {0:15.6f} {force[i][0]:15.6e} {force[i][1]:15.6e} {force[i][2]:15.6e}")
+                buff.append(
+                    f"atom {coord[i][0]:15.6f} {coord[i][1]:15.6f} {coord[i][2]:15.6f} {atom_names[atype[i]]:>7} {0:15.6f} {0:15.6f} {force[i][0]:15.6e} {force[i][1]:15.6e} {force[i][2]:15.6e}"
+                )
             buff.append(f"energy {energy:15.6f}")
             buff.append(f"charge {0:15.6f}")
             buff.append("end")
         with open(file_name, "w") as fp:
-            fp.write("\n".join(buff))
\ No newline at end of file
+            fp.write("\n".join(buff))

From b7dcc82ae34df95afcbaf99605663c882034c19b Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 16:54:57 -0400
Subject: [PATCH 04/12] add argument description in the docstring for file_name
 of to_labeled_system function

---
 dpdata/plugins/np2p.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/np2p.py
index 97c718f96..04076e04c 100644
--- a/dpdata/plugins/np2p.py
+++ b/dpdata/plugins/np2p.py
@@ -156,6 +156,8 @@ def to_labeled_system(self, data, file_name, **kwargs):
         ----------
         data : dict
             system data, whose keys are defined in LabeledSystem.DTYPES
+        file_name : str
+            file name, where the data will be written
         *args : list
             arguments that will be passed from the method
         **kwargs : dict

From 715a8369a1163c14ed257339956de62bff51b12f Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 17:42:22 -0400
Subject: [PATCH 05/12] correct cell format

---
 dpdata/plugins/np2p.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/np2p.py
index 04076e04c..1bc265fd9 100644
--- a/dpdata/plugins/np2p.py
+++ b/dpdata/plugins/np2p.py
@@ -176,7 +176,7 @@ def to_labeled_system(self, data, file_name, **kwargs):
             buff.append("begin")
             for i in range(3):
                 buff.append(
-                    f"lattice {cell[i][0]:15.6f}  {cell[i][2]:15.6f}  {cell[i][1]:15.6f}"
+                    f"lattice {cell[i][0]:15.6f}  {cell[i][1]:15.6f}  {cell[i][2]:15.6f}"
                 )
             for i in range(natom):
                 buff.append(

From fdc7ef05b4109b48d0480f2fde021aed9b7c3b2f Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 18:16:40 -0400
Subject: [PATCH 06/12] do not change the order of elements for n2p2 data

---
 dpdata/plugins/np2p.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/np2p.py
index 1bc265fd9..d8d619463 100644
--- a/dpdata/plugins/np2p.py
+++ b/dpdata/plugins/np2p.py
@@ -86,11 +86,11 @@ def from_labeled_system(self, file_name, **kwargs):
 
                     # Check if the number of atoms of each type is consistent across all frames
                     atype = np.array(atype)
-                    atype_sroted = sorted(atype)
-                    unique_atypes = set(atype_sroted)
+                    unique_dict = {element: None for element in atype}
+                    unique_atypes = np.array(list(unique_dict.keys()))
                     unique_atypes_list = list(unique_atypes)
                     ntypes = len(unique_atypes)
-                    natoms = [atype_sroted.count(at) for at in unique_atypes]
+                    natoms = [len(atype[atype==at]) for at in unique_atypes]
                     if natoms0 is None:
                         natoms0 = natoms
                     else:

From 537058007b5c0704cc5712d9e32473156ae5eef0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 2 Apr 2024 22:18:57 +0000
Subject: [PATCH 07/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 dpdata/plugins/np2p.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/np2p.py
index d8d619463..b2275355e 100644
--- a/dpdata/plugins/np2p.py
+++ b/dpdata/plugins/np2p.py
@@ -90,7 +90,7 @@ def from_labeled_system(self, file_name, **kwargs):
                     unique_atypes = np.array(list(unique_dict.keys()))
                     unique_atypes_list = list(unique_atypes)
                     ntypes = len(unique_atypes)
-                    natoms = [len(atype[atype==at]) for at in unique_atypes]
+                    natoms = [len(atype[atype == at]) for at in unique_atypes]
                     if natoms0 is None:
                         natoms0 = natoms
                     else:

From d2811250649d162723ea3e67def392659048c425 Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 18:20:58 -0400
Subject: [PATCH 08/12] add documentation for N2P2Format class

---
 dpdata/plugins/np2p.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/np2p.py
index d8d619463..616574538 100644
--- a/dpdata/plugins/np2p.py
+++ b/dpdata/plugins/np2p.py
@@ -36,6 +36,12 @@ def match_indices(atype1, atype2):
 
 @Format.register("n2p2")
 class N2P2Format(Format):
+    """n2p2
+
+    This class support the conversion from and to the training data of n2p2 format.
+    For more information about the n2p2 format, please refer to https://compphysvienna.github.io/n2p2/topics/cfg_file.html
+    """
+
     def from_labeled_system(self, file_name, **kwargs):
         """Implement LabeledSystem.from that converts from this format to LabeledSystem.
 

From aacc1bd19be6687322222dd446d49b34a256b396 Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 18:23:56 -0400
Subject: [PATCH 09/12] rename: np2p->n2p2

---
 dpdata/plugins/{np2p.py => n2p2.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename dpdata/plugins/{np2p.py => n2p2.py} (100%)

diff --git a/dpdata/plugins/np2p.py b/dpdata/plugins/n2p2.py
similarity index 100%
rename from dpdata/plugins/np2p.py
rename to dpdata/plugins/n2p2.py

From 506b1872dbad41d1f47d90210edde01e583fff3d Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 18:25:33 -0400
Subject: [PATCH 10/12] add unittest for n2p2

---
 tests/n2p2/input.data | 20 ++++++++++++++
 tests/test_n2p2.py    | 64 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 tests/n2p2/input.data
 create mode 100644 tests/test_n2p2.py

diff --git a/tests/n2p2/input.data b/tests/n2p2/input.data
new file mode 100644
index 000000000..02eef0606
--- /dev/null
+++ b/tests/n2p2/input.data
@@ -0,0 +1,20 @@
+begin
+lattice       18.897261         0.000000         0.000000
+lattice        0.000000        18.897261         0.000000
+lattice        0.000000         0.000000        18.897261
+atom        1.889726        0.000000        0.000000       O        0.000000        0.000000    9.723452e-03    0.000000e+00    0.000000e+00
+atom        0.000000        0.000000        2.834589       H        0.000000        0.000000    0.000000e+00    0.000000e+00    1.458518e-02
+atom        1.889726        0.000000        5.669178       H        0.000000        0.000000    9.723452e-03    0.000000e+00    2.917036e-02
+energy        0.044099
+charge        0.000000
+end
+begin
+lattice       18.897261         0.000000         0.000000
+lattice        0.000000        18.897261         0.000000
+lattice        0.000000         0.000000        18.897261
+atom        3.779452        1.889726        1.889726       O        0.000000        0.000000    4.861726e-02    3.889381e-02    3.889381e-02
+atom        1.889726        1.889726        4.724315       H        0.000000        0.000000    3.889381e-02    3.889381e-02    5.347899e-02
+atom        3.779452        1.889726        7.558904       H        0.000000        0.000000    4.861726e-02    3.889381e-02    6.806416e-02
+energy        0.084523
+charge        0.000000
+end
\ No newline at end of file
diff --git a/tests/test_n2p2.py b/tests/test_n2p2.py
new file mode 100644
index 000000000..741bbdf2d
--- /dev/null
+++ b/tests/test_n2p2.py
@@ -0,0 +1,64 @@
+import os
+import unittest
+
+import numpy as np
+from context import dpdata
+
+from dpdata.unit import EnergyConversion, ForceConversion, LengthConversion
+
+length_convert = LengthConversion("bohr", "angstrom").value()
+energy_convert = EnergyConversion("hartree", "eV").value()
+force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
+
+
+class TestN2P2(unittest.TestCase):
+    def setUp(self):
+        self.data_ref = {'atom_numbs': [1, 2], 'atom_names': ['O', 'H'], 'atom_types': np.array([0, 1, 1]), 'orig': np.array([0., 0., 0.]), 'cells': np.array([[[10.,  0.,  0.],
+        [ 0., 10.,  0.],
+        [ 0.,  0., 10.]],
+
+       [[10.,  0.,  0.],
+        [ 0., 10.,  0.],
+        [ 0.,  0., 10.]]]), 'coords': np.array([[[1. , 0. , 0. ],
+        [0. , 0. , 1.5],
+        [1. , 0. , 3. ]],
+
+       [[2. , 1. , 1. ],
+        [1. , 1. , 2.5],
+        [2. , 1. , 4. ]]]), 'energies': np.array([1.2, 2.3]), 'forces': np.array([[[0.5 , 0.  , 0.  ],
+        [0.  , 0.  , 0.75],
+        [0.5 , 0.  , 1.5 ]],
+
+       [[2.5 , 2.  , 2.  ],
+        [2.  , 2.  , 2.75],
+        [2.5 , 2.  , 3.5 ]]])}
+
+    def test_n2p2_from_labeled_system(self):
+        data = dpdata.LabeledSystem("n2p2/input.data", fmt="n2p2")
+        for key in self.data_ref:
+            if key == "atom_numbs":
+                self.assertEqual(data[key], self.data_ref[key])
+            elif key == "atom_names":
+                self.assertEqual(data[key], self.data_ref[key])
+            elif key == 'atom_types':
+                np.testing.assert_array_equal(data[key], self.data_ref[key])
+            else:
+                np.testing.assert_array_almost_equal(data[key], self.data_ref[key], decimal=5)
+
+    def test_n2p2_to_labeled_system(self):
+        output_file = "n2p2/output.data"
+        data = dpdata.LabeledSystem.from_dict({"data": self.data_ref})
+        data.to_n2p2(output_file)
+        ref_file = "n2p2/input.data"
+        with open(ref_file, 'r') as file1, open(output_file, 'r') as file2:
+            file1_lines = file1.readlines()
+            file2_lines = file2.readlines()
+
+        file1_lines = [line.strip('\n') for line in file1_lines]
+        file2_lines = [line.strip('\n') for line in file2_lines]
+        
+        self.assertListEqual(file1_lines, file2_lines)
+    
+    def tearDown(self):
+        if os.path.isfile("n2p2/output.data"):
+            os.remove("n2p2/output.data")
\ No newline at end of file

From ba6a00094584978cbec97f3b8ec8826e248ab2ac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 2 Apr 2024 22:26:24 +0000
Subject: [PATCH 11/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_n2p2.py | 62 ++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/tests/test_n2p2.py b/tests/test_n2p2.py
index 741bbdf2d..855a27524 100644
--- a/tests/test_n2p2.py
+++ b/tests/test_n2p2.py
@@ -13,25 +13,31 @@
 
 class TestN2P2(unittest.TestCase):
     def setUp(self):
-        self.data_ref = {'atom_numbs': [1, 2], 'atom_names': ['O', 'H'], 'atom_types': np.array([0, 1, 1]), 'orig': np.array([0., 0., 0.]), 'cells': np.array([[[10.,  0.,  0.],
-        [ 0., 10.,  0.],
-        [ 0.,  0., 10.]],
-
-       [[10.,  0.,  0.],
-        [ 0., 10.,  0.],
-        [ 0.,  0., 10.]]]), 'coords': np.array([[[1. , 0. , 0. ],
-        [0. , 0. , 1.5],
-        [1. , 0. , 3. ]],
-
-       [[2. , 1. , 1. ],
-        [1. , 1. , 2.5],
-        [2. , 1. , 4. ]]]), 'energies': np.array([1.2, 2.3]), 'forces': np.array([[[0.5 , 0.  , 0.  ],
-        [0.  , 0.  , 0.75],
-        [0.5 , 0.  , 1.5 ]],
-
-       [[2.5 , 2.  , 2.  ],
-        [2.  , 2.  , 2.75],
-        [2.5 , 2.  , 3.5 ]]])}
+        self.data_ref = {
+            "atom_numbs": [1, 2],
+            "atom_names": ["O", "H"],
+            "atom_types": np.array([0, 1, 1]),
+            "orig": np.array([0.0, 0.0, 0.0]),
+            "cells": np.array(
+                [
+                    [[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]],
+                    [[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]],
+                ]
+            ),
+            "coords": np.array(
+                [
+                    [[1.0, 0.0, 0.0], [0.0, 0.0, 1.5], [1.0, 0.0, 3.0]],
+                    [[2.0, 1.0, 1.0], [1.0, 1.0, 2.5], [2.0, 1.0, 4.0]],
+                ]
+            ),
+            "energies": np.array([1.2, 2.3]),
+            "forces": np.array(
+                [
+                    [[0.5, 0.0, 0.0], [0.0, 0.0, 0.75], [0.5, 0.0, 1.5]],
+                    [[2.5, 2.0, 2.0], [2.0, 2.0, 2.75], [2.5, 2.0, 3.5]],
+                ]
+            ),
+        }
 
     def test_n2p2_from_labeled_system(self):
         data = dpdata.LabeledSystem("n2p2/input.data", fmt="n2p2")
@@ -40,25 +46,27 @@ def test_n2p2_from_labeled_system(self):
                 self.assertEqual(data[key], self.data_ref[key])
             elif key == "atom_names":
                 self.assertEqual(data[key], self.data_ref[key])
-            elif key == 'atom_types':
+            elif key == "atom_types":
                 np.testing.assert_array_equal(data[key], self.data_ref[key])
             else:
-                np.testing.assert_array_almost_equal(data[key], self.data_ref[key], decimal=5)
+                np.testing.assert_array_almost_equal(
+                    data[key], self.data_ref[key], decimal=5
+                )
 
     def test_n2p2_to_labeled_system(self):
         output_file = "n2p2/output.data"
         data = dpdata.LabeledSystem.from_dict({"data": self.data_ref})
         data.to_n2p2(output_file)
         ref_file = "n2p2/input.data"
-        with open(ref_file, 'r') as file1, open(output_file, 'r') as file2:
+        with open(ref_file) as file1, open(output_file) as file2:
             file1_lines = file1.readlines()
             file2_lines = file2.readlines()
 
-        file1_lines = [line.strip('\n') for line in file1_lines]
-        file2_lines = [line.strip('\n') for line in file2_lines]
-        
+        file1_lines = [line.strip("\n") for line in file1_lines]
+        file2_lines = [line.strip("\n") for line in file2_lines]
+
         self.assertListEqual(file1_lines, file2_lines)
-    
+
     def tearDown(self):
         if os.path.isfile("n2p2/output.data"):
-            os.remove("n2p2/output.data")
\ No newline at end of file
+            os.remove("n2p2/output.data")

From 1e1384a3e2d3d09593a71ce847c4a206e68f8fdc Mon Sep 17 00:00:00 2001
From: Yifan Li <yifanl0716@gmail.com>
Date: Tue, 2 Apr 2024 18:28:59 -0400
Subject: [PATCH 12/12] update document

---
 dpdata/plugins/n2p2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpdata/plugins/n2p2.py b/dpdata/plugins/n2p2.py
index c1040d92a..7162f09fa 100644
--- a/dpdata/plugins/n2p2.py
+++ b/dpdata/plugins/n2p2.py
@@ -36,14 +36,14 @@ def match_indices(atype1, atype2):
 
 @Format.register("n2p2")
 class N2P2Format(Format):
-    """n2p2
+    """n2p2.
 
     This class support the conversion from and to the training data of n2p2 format.
     For more information about the n2p2 format, please refer to https://compphysvienna.github.io/n2p2/topics/cfg_file.html
     """
 
     def from_labeled_system(self, file_name, **kwargs):
-        """Implement LabeledSystem.from that converts from this format to LabeledSystem.
+        """Read from n2p2 format.
 
         Parameters
         ----------
@@ -154,7 +154,7 @@ def from_labeled_system(self, file_name, **kwargs):
         }
 
     def to_labeled_system(self, data, file_name, **kwargs):
-        """Implement LabeledSystem.to that converts from LabeledSystem to this format.
+        """Write n2p2 format.
 
         By default, LabeledSystem.to will fallback to System.to.