From bf15bc34f9ac00e3f3d6c1129c4d61384005f4f6 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:45:13 +0000
Subject: [PATCH 01/10] Initial plan


From fbe45c1c45e4277b04fe71ecc0a09c1cdb5c0dcc Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:57:37 +0000
Subject: [PATCH 02/10] feat(quip/gap/xyz): implement to_labeled_system and
 to_multi_systems methods

Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com>
---
 dpdata/plugins/xyz.py                 | 127 ++++++++++++++++++++++++++
 tests/test_quip_gap_xyz_to_methods.py | 105 +++++++++++++++++++++
 2 files changed, 232 insertions(+)
 create mode 100644 tests/test_quip_gap_xyz_to_methods.py

diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index d56a8618c..412308b45 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from dpdata.format import Format
+from dpdata.periodic_table import Element
 from dpdata.utils import open_file
 
 if TYPE_CHECKING:
@@ -50,9 +51,135 @@ def from_system(self, file_name: FileType, **kwargs):
 @Format.register("quip/gap/xyz")
 @Format.register("quip/gap/xyz_file")
 class QuipGapXYZFormat(Format):
+    # Class variable to track which files have been written to
+    _written_files = set()
+    
     def from_labeled_system(self, data, **kwargs):
         return data
 
     def from_multi_systems(self, file_name, **kwargs):
         # here directory is the file_name
         return QuipGapxyzSystems(file_name)
+
+    def to_labeled_system(self, data, file_name: FileType, **kwargs):
+        """Write LabeledSystem data to QUIP/GAP XYZ format file.
+        
+        Parameters
+        ----------
+        data : dict
+            system data
+        file_name : FileType
+            output file name
+        **kwargs : dict
+            additional arguments
+        """
+        frames = []
+        nframes = len(data["energies"])
+        
+        for frame_idx in range(nframes):
+            frame_lines = self._format_single_frame(data, frame_idx)
+            frames.append("\n".join(frame_lines))
+        
+        # Determine if we should append based on whether file was written before
+        file_path = str(file_name)
+        if file_path in self._written_files:
+            mode = "a"
+        else:
+            mode = "w"
+            self._written_files.add(file_path)
+        
+        with open_file(file_name, mode) as fp:
+            if mode == "a":
+                # Add newline separator if appending
+                fp.write("\n")
+            fp.write("\n".join(frames))
+
+    def to_multi_systems(self, formulas, directory, **kwargs):
+        """Return single filename for all systems in QUIP/GAP XYZ format.
+        
+        For QUIP/GAP XYZ format, all systems are written to a single file.
+        We use a class variable to track which files have been written to.
+        
+        Parameters
+        ----------
+        formulas : list[str]
+            list of system names/formulas  
+        directory : str
+            output filename 
+        **kwargs : dict
+            additional arguments
+            
+        Returns
+        -------
+        list[str]
+            list with same filename for all systems
+        """
+        # Clear the written files set for this new operation
+        file_path = str(directory)
+        if file_path in self._written_files:
+            self._written_files.remove(file_path)
+        
+        # Return the same filename for all systems
+        return [directory] * len(formulas)
+
+    def _format_single_frame(self, data, frame_idx):
+        """Format a single frame of system data into QUIP/GAP XYZ format lines.
+        
+        Parameters
+        ----------
+        data : dict
+            system data
+        frame_idx : int
+            frame index
+            
+        Returns
+        -------
+        list[str]
+            lines for the frame
+        """
+        # Number of atoms
+        natoms = len(data["atom_types"])
+        
+        # Build header line with metadata
+        header_parts = []
+        
+        # Energy
+        energy = data["energies"][frame_idx]
+        header_parts.append(f"energy={energy:.12e}")
+        
+        # Virial (if present)
+        if "virials" in data:
+            virial = data["virials"][frame_idx]
+            virial_str = "    ".join(f"{v:.12e}" for v in virial.flatten())
+            header_parts.append(f'virial="{virial_str}"')
+        
+        # Lattice
+        cell = data["cells"][frame_idx]
+        lattice_str = "   ".join(f"{c:.12e}" for c in cell.flatten())
+        header_parts.append(f'Lattice="{lattice_str}"')
+        
+        # Properties
+        header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3")
+        
+        header_line = "    ".join(header_parts)
+        
+        # Format atom lines
+        atom_lines = []
+        coords = data["coords"][frame_idx]
+        forces = data["forces"][frame_idx]
+        atom_names = np.array(data["atom_names"])
+        atom_types = data["atom_types"]
+        
+        for i in range(natoms):
+            atom_type_idx = atom_types[i]
+            species = atom_names[atom_type_idx]
+            x, y, z = coords[i]
+            fx, fy, fz = forces[i]
+            atomic_number = Element(species).Z
+            
+            atom_line = f"{species}    {x:.11e}   {y:.11e}   {z:.11e}   {atomic_number}    {fx:.11e}  {fy:.11e}   {fz:.11e}"
+            atom_lines.append(atom_line)
+        
+        # Combine all lines for this frame
+        frame_lines = [str(natoms), header_line] + atom_lines
+        return frame_lines
diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py
new file mode 100644
index 000000000..141a1e4ca
--- /dev/null
+++ b/tests/test_quip_gap_xyz_to_methods.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+import tempfile
+import os
+import unittest
+from context import dpdata
+
+
+class TestQuipGapXYZToMethods(unittest.TestCase):
+    """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat"""
+    
+    def setUp(self):
+        """Set up test data"""
+        # Load test multi-systems  
+        self.multi_systems = dpdata.MultiSystems.from_file(
+            "xyz/xyz_unittest.xyz", "quip/gap/xyz"
+        )
+        self.system_b1c9 = self.multi_systems.systems["B1C9"]
+        self.system_b5c7 = self.multi_systems.systems["B5C7"]
+    
+    def test_to_labeled_system(self):
+        """Test writing a single labeled system to QUIP/GAP XYZ format"""
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file:
+            output_file = tmp_file.name
+        
+        try:
+            # Write the system to file
+            self.system_b1c9.to('quip/gap/xyz', output_file)
+            
+            # Verify file was created and has content
+            self.assertTrue(os.path.exists(output_file))
+            with open(output_file, 'r') as f:
+                content = f.read()
+            self.assertTrue(len(content) > 0)
+            
+            # Read back and verify we can parse it
+            reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz')
+            self.assertEqual(len(reloaded_multi.systems), 1)
+            
+            # Verify the data matches (we should have the same system)
+            reloaded_system = list(reloaded_multi.systems.values())[0]
+            self.assertEqual(len(reloaded_system), len(self.system_b1c9))
+            
+        finally:
+            if os.path.exists(output_file):
+                os.unlink(output_file)
+    
+    def test_to_multi_systems(self):
+        """Test writing multiple systems to a single QUIP/GAP XYZ format file"""
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file:
+            output_file = tmp_file.name
+        
+        try:
+            # Write all systems to file
+            self.multi_systems.to('quip/gap/xyz', output_file)
+            
+            # Verify file was created and has content
+            self.assertTrue(os.path.exists(output_file))
+            with open(output_file, 'r') as f:
+                content = f.read()
+            self.assertTrue(len(content) > 0)
+            
+            # Read back and verify we get the same number of systems
+            reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz')
+            self.assertEqual(len(reloaded_multi.systems), len(self.multi_systems.systems))
+            
+            # Verify total number of frames is preserved
+            original_frames = sum(len(sys) for sys in self.multi_systems.systems.values())
+            reloaded_frames = sum(len(sys) for sys in reloaded_multi.systems.values())
+            self.assertEqual(reloaded_frames, original_frames)
+            
+        finally:
+            if os.path.exists(output_file):
+                os.unlink(output_file)
+    
+    def test_roundtrip_consistency(self):
+        """Test that writing and reading back preserves data consistency"""
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file:
+            output_file = tmp_file.name
+        
+        try:
+            # Write and read back
+            self.multi_systems.to('quip/gap/xyz', output_file)
+            reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz')
+            
+            # Compare original and reloaded data for each system
+            for system_name in self.multi_systems.systems:
+                if system_name in reloaded_multi.systems:
+                    original = self.multi_systems.systems[system_name]
+                    reloaded = reloaded_multi.systems[system_name]
+                    
+                    # Check basic properties
+                    self.assertEqual(len(original), len(reloaded))
+                    self.assertEqual(len(original.data['atom_names']), len(reloaded.data['atom_names']))
+                    
+                    # Note: We don't check exact numerical equality because of floating point precision
+                    # and potential differences in formatting, but the data should be structurally the same
+            
+        finally:
+            if os.path.exists(output_file):
+                os.unlink(output_file)
+
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file

From cf6f5337235bfa196461643280531bc2c03d27ba Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Sep 2025 11:59:23 +0000
Subject: [PATCH 03/10] docs: update test comments to reflect correct QUIP/GAP
 XYZ usage pattern

Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com>
---
 tests/test_quip_gap_xyz_to_methods.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py
index 141a1e4ca..47f893a99 100644
--- a/tests/test_quip_gap_xyz_to_methods.py
+++ b/tests/test_quip_gap_xyz_to_methods.py
@@ -33,7 +33,7 @@ def test_to_labeled_system(self):
                 content = f.read()
             self.assertTrue(len(content) > 0)
             
-            # Read back and verify we can parse it
+            # Read back and verify we can parse it (use MultiSystems.from_file for QUIP/GAP XYZ)
             reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz')
             self.assertEqual(len(reloaded_multi.systems), 1)
             

From cc22db15a00ba1c9ed3767d1ec85812433ced827 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Sep 2025 12:23:53 +0000
Subject: [PATCH 04/10] refactor(quip/gap/xyz): use yield pattern and move
 format function per reviewer feedback

Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com>
---
 dpdata/plugins/xyz.py      | 95 ++++++--------------------------------
 dpdata/xyz/quip_gap_xyz.py | 65 ++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 81 deletions(-)

diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index 412308b45..96e596343 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -10,7 +10,7 @@
 
 if TYPE_CHECKING:
     from dpdata.utils import FileType
-from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems
+from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems, format_single_frame
 from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord
 
 
@@ -51,8 +51,6 @@ def from_system(self, file_name: FileType, **kwargs):
 @Format.register("quip/gap/xyz")
 @Format.register("quip/gap/xyz_file")
 class QuipGapXYZFormat(Format):
-    # Class variable to track which files have been written to
-    _written_files = set()
     
     def from_labeled_system(self, data, **kwargs):
         return data
@@ -77,16 +75,16 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs):
         nframes = len(data["energies"])
         
         for frame_idx in range(nframes):
-            frame_lines = self._format_single_frame(data, frame_idx)
+            frame_lines = format_single_frame(data, frame_idx)
             frames.append("\n".join(frame_lines))
         
-        # Determine if we should append based on whether file was written before
+        # Check if file exists to determine write mode
         file_path = str(file_name)
-        if file_path in self._written_files:
+        import os
+        if os.path.exists(file_path):
             mode = "a"
         else:
             mode = "w"
-            self._written_files.add(file_path)
         
         with open_file(file_name, mode) as fp:
             if mode == "a":
@@ -98,7 +96,6 @@ def to_multi_systems(self, formulas, directory, **kwargs):
         """Return single filename for all systems in QUIP/GAP XYZ format.
         
         For QUIP/GAP XYZ format, all systems are written to a single file.
-        We use a class variable to track which files have been written to.
         
         Parameters
         ----------
@@ -109,77 +106,13 @@ def to_multi_systems(self, formulas, directory, **kwargs):
         **kwargs : dict
             additional arguments
             
-        Returns
-        -------
-        list[str]
-            list with same filename for all systems
+        Yields
+        ------
+        str
+            same filename for all systems
         """
-        # Clear the written files set for this new operation
-        file_path = str(directory)
-        if file_path in self._written_files:
-            self._written_files.remove(file_path)
-        
-        # Return the same filename for all systems
-        return [directory] * len(formulas)
-
-    def _format_single_frame(self, data, frame_idx):
-        """Format a single frame of system data into QUIP/GAP XYZ format lines.
-        
-        Parameters
-        ----------
-        data : dict
-            system data
-        frame_idx : int
-            frame index
-            
-        Returns
-        -------
-        list[str]
-            lines for the frame
-        """
-        # Number of atoms
-        natoms = len(data["atom_types"])
-        
-        # Build header line with metadata
-        header_parts = []
-        
-        # Energy
-        energy = data["energies"][frame_idx]
-        header_parts.append(f"energy={energy:.12e}")
-        
-        # Virial (if present)
-        if "virials" in data:
-            virial = data["virials"][frame_idx]
-            virial_str = "    ".join(f"{v:.12e}" for v in virial.flatten())
-            header_parts.append(f'virial="{virial_str}"')
-        
-        # Lattice
-        cell = data["cells"][frame_idx]
-        lattice_str = "   ".join(f"{c:.12e}" for c in cell.flatten())
-        header_parts.append(f'Lattice="{lattice_str}"')
-        
-        # Properties
-        header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3")
-        
-        header_line = "    ".join(header_parts)
-        
-        # Format atom lines
-        atom_lines = []
-        coords = data["coords"][frame_idx]
-        forces = data["forces"][frame_idx]
-        atom_names = np.array(data["atom_names"])
-        atom_types = data["atom_types"]
-        
-        for i in range(natoms):
-            atom_type_idx = atom_types[i]
-            species = atom_names[atom_type_idx]
-            x, y, z = coords[i]
-            fx, fy, fz = forces[i]
-            atomic_number = Element(species).Z
-            
-            atom_line = f"{species}    {x:.11e}   {y:.11e}   {z:.11e}   {atomic_number}    {fx:.11e}  {fy:.11e}   {fz:.11e}"
-            atom_lines.append(atom_line)
-        
-        # Combine all lines for this frame
-        frame_lines = [str(natoms), header_line] + atom_lines
-        return frame_lines
+        with open_file(directory, 'w'):
+            # Just create/truncate the file, then yield filenames
+            pass
+        for _ in formulas:
+            yield directory
diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py
index cba971e47..c43342973 100644
--- a/dpdata/xyz/quip_gap_xyz.py
+++ b/dpdata/xyz/quip_gap_xyz.py
@@ -7,6 +7,8 @@
 
 import numpy as np
 
+from dpdata.periodic_table import Element
+
 
 class QuipGapxyzSystems:
     """deal with QuipGapxyzFile."""
@@ -183,3 +185,66 @@ def handle_single_xyz_frame(lines):
             info_dict["virials"] = virials
         info_dict["orig"] = np.zeros(3)
         return info_dict
+
+
+def format_single_frame(data, frame_idx):
+    """Format a single frame of system data into QUIP/GAP XYZ format lines.
+    
+    Parameters
+    ----------
+    data : dict
+        system data
+    frame_idx : int
+        frame index
+        
+    Returns
+    -------
+    list[str]
+        lines for the frame
+    """
+    # Number of atoms
+    natoms = len(data["atom_types"])
+    
+    # Build header line with metadata
+    header_parts = []
+    
+    # Energy
+    energy = data["energies"][frame_idx]
+    header_parts.append(f"energy={energy:.12e}")
+    
+    # Virial (if present)
+    if "virials" in data:
+        virial = data["virials"][frame_idx]
+        virial_str = "    ".join(f"{v:.12e}" for v in virial.flatten())
+        header_parts.append(f'virial="{virial_str}"')
+    
+    # Lattice
+    cell = data["cells"][frame_idx]
+    lattice_str = "   ".join(f"{c:.12e}" for c in cell.flatten())
+    header_parts.append(f'Lattice="{lattice_str}"')
+    
+    # Properties
+    header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3")
+    
+    header_line = "    ".join(header_parts)
+    
+    # Format atom lines
+    atom_lines = []
+    coords = data["coords"][frame_idx]
+    forces = data["forces"][frame_idx]
+    atom_names = np.array(data["atom_names"])
+    atom_types = data["atom_types"]
+    
+    for i in range(natoms):
+        atom_type_idx = atom_types[i]
+        species = atom_names[atom_type_idx]
+        x, y, z = coords[i]
+        fx, fy, fz = forces[i]
+        atomic_number = Element(species).Z
+        
+        atom_line = f"{species}    {x:.11e}   {y:.11e}   {z:.11e}   {atomic_number}    {fx:.11e}  {fy:.11e}   {fz:.11e}"
+        atom_lines.append(atom_line)
+    
+    # Combine all lines for this frame
+    frame_lines = [str(natoms), header_line] + atom_lines
+    return frame_lines

From 794887bf0e43fa9335e92dd8d7bac1e2038c1d3f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Sep 2025 12:47:27 +0000
Subject: [PATCH 05/10] refactor(quip/gap/xyz): implement file handler yield
 pattern and improve import organization

Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com>
---
 dpdata/plugins/xyz.py | 63 ++++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index 96e596343..851e60a27 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -1,11 +1,12 @@
 from __future__ import annotations
 
+import io
+import os
 from typing import TYPE_CHECKING
 
 import numpy as np
 
 from dpdata.format import Format
-from dpdata.periodic_table import Element
 from dpdata.utils import open_file
 
 if TYPE_CHECKING:
@@ -51,7 +52,6 @@ def from_system(self, file_name: FileType, **kwargs):
 @Format.register("quip/gap/xyz")
 @Format.register("quip/gap/xyz_file")
 class QuipGapXYZFormat(Format):
-    
     def from_labeled_system(self, data, **kwargs):
         return data
 
@@ -61,58 +61,61 @@ def from_multi_systems(self, file_name, **kwargs):
 
     def to_labeled_system(self, data, file_name: FileType, **kwargs):
         """Write LabeledSystem data to QUIP/GAP XYZ format file.
-        
+
         Parameters
         ----------
         data : dict
             system data
         file_name : FileType
-            output file name
+            output file name or file handler
         **kwargs : dict
             additional arguments
         """
         frames = []
         nframes = len(data["energies"])
-        
+
         for frame_idx in range(nframes):
             frame_lines = format_single_frame(data, frame_idx)
             frames.append("\n".join(frame_lines))
-        
-        # Check if file exists to determine write mode
-        file_path = str(file_name)
-        import os
-        if os.path.exists(file_path):
-            mode = "a"
+
+        # Always write unless input is a file handler
+        if isinstance(file_name, io.IOBase):
+            # File handler - check if we need to append
+            file_path = getattr(file_name, "name", None)
+            if file_path and os.path.exists(file_path):
+                mode = "a"
+            else:
+                mode = "w"
+            with open_file(file_name, mode) as fp:
+                if mode == "a":
+                    # Add newline separator if appending
+                    fp.write("\n")
+                fp.write("\n".join(frames))
         else:
-            mode = "w"
-        
-        with open_file(file_name, mode) as fp:
-            if mode == "a":
-                # Add newline separator if appending
-                fp.write("\n")
-            fp.write("\n".join(frames))
+            # File path - always write (overwrite)
+            with open_file(file_name, "w") as fp:
+                fp.write("\n".join(frames))
 
     def to_multi_systems(self, formulas, directory, **kwargs):
         """Return single filename for all systems in QUIP/GAP XYZ format.
-        
+
         For QUIP/GAP XYZ format, all systems are written to a single file.
-        
+
         Parameters
         ----------
         formulas : list[str]
-            list of system names/formulas  
+            list of system names/formulas
         directory : str
-            output filename 
+            output filename
         **kwargs : dict
             additional arguments
-            
+
         Yields
         ------
-        str
-            same filename for all systems
+        file handler
+            file handler for all systems
         """
-        with open_file(directory, 'w'):
-            # Just create/truncate the file, then yield filenames
-            pass
-        for _ in formulas:
-            yield directory
+        with open_file(directory, "w") as f:
+            # Just create/truncate the file, then yield file handlers
+            for _ in formulas:
+                yield f

From 573021a92c4dbd73fb2aaca8ed6e579cec3dc3e4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
Date: Mon, 1 Sep 2025 20:52:56 +0800
Subject: [PATCH 06/10] Update xyz.py

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
---
 dpdata/plugins/xyz.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index 851e60a27..740a0c14b 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -78,21 +78,9 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs):
             frame_lines = format_single_frame(data, frame_idx)
             frames.append("\n".join(frame_lines))
 
-        # Always write unless input is a file handler
         if isinstance(file_name, io.IOBase):
-            # File handler - check if we need to append
-            file_path = getattr(file_name, "name", None)
-            if file_path and os.path.exists(file_path):
-                mode = "a"
-            else:
-                mode = "w"
-            with open_file(file_name, mode) as fp:
-                if mode == "a":
-                    # Add newline separator if appending
-                    fp.write("\n")
-                fp.write("\n".join(frames))
+            file_name.write("\n".join(frames))
         else:
-            # File path - always write (overwrite)
             with open_file(file_name, "w") as fp:
                 fp.write("\n".join(frames))
 

From 683e4f1349b20443c9e32bd15415d43cc46a5759 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 1 Sep 2025 12:53:21 +0000
Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 dpdata/plugins/xyz.py                 |  1 -
 dpdata/xyz/quip_gap_xyz.py            | 24 ++++----
 tests/test_quip_gap_xyz_to_methods.py | 87 ++++++++++++++++-----------
 3 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index 740a0c14b..1864f2219 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import io
-import os
 from typing import TYPE_CHECKING
 
 import numpy as np
diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py
index c43342973..71e976de6 100644
--- a/dpdata/xyz/quip_gap_xyz.py
+++ b/dpdata/xyz/quip_gap_xyz.py
@@ -189,14 +189,14 @@ def handle_single_xyz_frame(lines):
 
 def format_single_frame(data, frame_idx):
     """Format a single frame of system data into QUIP/GAP XYZ format lines.
-    
+
     Parameters
     ----------
     data : dict
         system data
     frame_idx : int
         frame index
-        
+
     Returns
     -------
     list[str]
@@ -204,47 +204,47 @@ def format_single_frame(data, frame_idx):
     """
     # Number of atoms
     natoms = len(data["atom_types"])
-    
+
     # Build header line with metadata
     header_parts = []
-    
+
     # Energy
     energy = data["energies"][frame_idx]
     header_parts.append(f"energy={energy:.12e}")
-    
+
     # Virial (if present)
     if "virials" in data:
         virial = data["virials"][frame_idx]
         virial_str = "    ".join(f"{v:.12e}" for v in virial.flatten())
         header_parts.append(f'virial="{virial_str}"')
-    
+
     # Lattice
     cell = data["cells"][frame_idx]
     lattice_str = "   ".join(f"{c:.12e}" for c in cell.flatten())
     header_parts.append(f'Lattice="{lattice_str}"')
-    
+
     # Properties
     header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3")
-    
+
     header_line = "    ".join(header_parts)
-    
+
     # Format atom lines
     atom_lines = []
     coords = data["coords"][frame_idx]
     forces = data["forces"][frame_idx]
     atom_names = np.array(data["atom_names"])
     atom_types = data["atom_types"]
-    
+
     for i in range(natoms):
         atom_type_idx = atom_types[i]
         species = atom_names[atom_type_idx]
         x, y, z = coords[i]
         fx, fy, fz = forces[i]
         atomic_number = Element(species).Z
-        
+
         atom_line = f"{species}    {x:.11e}   {y:.11e}   {z:.11e}   {atomic_number}    {fx:.11e}  {fy:.11e}   {fz:.11e}"
         atom_lines.append(atom_line)
-    
+
     # Combine all lines for this frame
     frame_lines = [str(natoms), header_line] + atom_lines
     return frame_lines
diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py
index 47f893a99..c34d37af6 100644
--- a/tests/test_quip_gap_xyz_to_methods.py
+++ b/tests/test_quip_gap_xyz_to_methods.py
@@ -1,105 +1,120 @@
 #!/usr/bin/env python3
+from __future__ import annotations
 
-import tempfile
 import os
+import tempfile
 import unittest
+
 from context import dpdata
 
 
 class TestQuipGapXYZToMethods(unittest.TestCase):
     """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat"""
-    
+
     def setUp(self):
         """Set up test data"""
-        # Load test multi-systems  
+        # Load test multi-systems
         self.multi_systems = dpdata.MultiSystems.from_file(
             "xyz/xyz_unittest.xyz", "quip/gap/xyz"
         )
         self.system_b1c9 = self.multi_systems.systems["B1C9"]
         self.system_b5c7 = self.multi_systems.systems["B5C7"]
-    
+
     def test_to_labeled_system(self):
         """Test writing a single labeled system to QUIP/GAP XYZ format"""
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file:
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".xyz", delete=False
+        ) as tmp_file:
             output_file = tmp_file.name
-        
+
         try:
             # Write the system to file
-            self.system_b1c9.to('quip/gap/xyz', output_file)
-            
+            self.system_b1c9.to("quip/gap/xyz", output_file)
+
             # Verify file was created and has content
             self.assertTrue(os.path.exists(output_file))
-            with open(output_file, 'r') as f:
+            with open(output_file) as f:
                 content = f.read()
             self.assertTrue(len(content) > 0)
-            
+
             # Read back and verify we can parse it (use MultiSystems.from_file for QUIP/GAP XYZ)
-            reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz')
+            reloaded_multi = dpdata.MultiSystems.from_file(output_file, "quip/gap/xyz")
             self.assertEqual(len(reloaded_multi.systems), 1)
-            
+
             # Verify the data matches (we should have the same system)
             reloaded_system = list(reloaded_multi.systems.values())[0]
             self.assertEqual(len(reloaded_system), len(self.system_b1c9))
-            
+
         finally:
             if os.path.exists(output_file):
                 os.unlink(output_file)
-    
+
     def test_to_multi_systems(self):
         """Test writing multiple systems to a single QUIP/GAP XYZ format file"""
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file:
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".xyz", delete=False
+        ) as tmp_file:
             output_file = tmp_file.name
-        
+
         try:
             # Write all systems to file
-            self.multi_systems.to('quip/gap/xyz', output_file)
-            
+            self.multi_systems.to("quip/gap/xyz", output_file)
+
             # Verify file was created and has content
             self.assertTrue(os.path.exists(output_file))
-            with open(output_file, 'r') as f:
+            with open(output_file) as f:
                 content = f.read()
             self.assertTrue(len(content) > 0)
-            
+
             # Read back and verify we get the same number of systems
-            reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz')
-            self.assertEqual(len(reloaded_multi.systems), len(self.multi_systems.systems))
-            
+            reloaded_multi = dpdata.MultiSystems.from_file(output_file, "quip/gap/xyz")
+            self.assertEqual(
+                len(reloaded_multi.systems), len(self.multi_systems.systems)
+            )
+
             # Verify total number of frames is preserved
-            original_frames = sum(len(sys) for sys in self.multi_systems.systems.values())
+            original_frames = sum(
+                len(sys) for sys in self.multi_systems.systems.values()
+            )
             reloaded_frames = sum(len(sys) for sys in reloaded_multi.systems.values())
             self.assertEqual(reloaded_frames, original_frames)
-            
+
         finally:
             if os.path.exists(output_file):
                 os.unlink(output_file)
-    
+
     def test_roundtrip_consistency(self):
         """Test that writing and reading back preserves data consistency"""
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file:
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".xyz", delete=False
+        ) as tmp_file:
             output_file = tmp_file.name
-        
+
         try:
             # Write and read back
-            self.multi_systems.to('quip/gap/xyz', output_file)
-            reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz')
-            
+            self.multi_systems.to("quip/gap/xyz", output_file)
+            reloaded_multi = dpdata.MultiSystems.from_file(output_file, "quip/gap/xyz")
+
             # Compare original and reloaded data for each system
             for system_name in self.multi_systems.systems:
                 if system_name in reloaded_multi.systems:
                     original = self.multi_systems.systems[system_name]
                     reloaded = reloaded_multi.systems[system_name]
-                    
+
                     # Check basic properties
                     self.assertEqual(len(original), len(reloaded))
-                    self.assertEqual(len(original.data['atom_names']), len(reloaded.data['atom_names']))
-                    
+                    self.assertEqual(
+                        len(original.data["atom_names"]),
+                        len(reloaded.data["atom_names"]),
+                    )
+
                     # Note: We don't check exact numerical equality because of floating point precision
                     # and potential differences in formatting, but the data should be structurally the same
-            
+
         finally:
             if os.path.exists(output_file):
                 os.unlink(output_file)
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 8871e2f184b39c517d35d4b5ccb421883a9ef5f8 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Sep 2025 13:37:48 +0000
Subject: [PATCH 08/10] fix(quip/gap/xyz): fix multi-systems export to handle
 file handler positioning correctly

Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com>
---
 dpdata/plugins/xyz.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index 1864f2219..8baad0725 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -77,11 +77,18 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs):
             frame_lines = format_single_frame(data, frame_idx)
             frames.append("\n".join(frame_lines))
 
+        content = "\n".join(frames)
+
         if isinstance(file_name, io.IOBase):
-            file_name.write("\n".join(frames))
+            # When writing to a file handler, check if file is empty
+            # If not empty, add a newline separator before content
+            current_pos = file_name.tell()
+            if current_pos > 0:
+                file_name.write("\n")
+            file_name.write(content)
         else:
             with open_file(file_name, "w") as fp:
-                fp.write("\n".join(frames))
+                fp.write(content)
 
     def to_multi_systems(self, formulas, directory, **kwargs):
         """Return single filename for all systems in QUIP/GAP XYZ format.

From 3e19545d88b9efdc6728e9e5527b491af3fc2294 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
Date: Mon, 1 Sep 2025 21:51:17 +0800
Subject: [PATCH 09/10] Update docstrings for clarity in tests

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
---
 tests/test_quip_gap_xyz_to_methods.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py
index c34d37af6..7b38cc334 100644
--- a/tests/test_quip_gap_xyz_to_methods.py
+++ b/tests/test_quip_gap_xyz_to_methods.py
@@ -9,10 +9,10 @@
 
 
 class TestQuipGapXYZToMethods(unittest.TestCase):
-    """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat"""
+    """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat."""
 
     def setUp(self):
-        """Set up test data"""
+        """Set up test data."""
         # Load test multi-systems
         self.multi_systems = dpdata.MultiSystems.from_file(
             "xyz/xyz_unittest.xyz", "quip/gap/xyz"
@@ -21,7 +21,7 @@ def setUp(self):
         self.system_b5c7 = self.multi_systems.systems["B5C7"]
 
     def test_to_labeled_system(self):
-        """Test writing a single labeled system to QUIP/GAP XYZ format"""
+        """Test writing a single labeled system to QUIP/GAP XYZ format."""
         with tempfile.NamedTemporaryFile(
             mode="w", suffix=".xyz", delete=False
         ) as tmp_file:
@@ -50,7 +50,7 @@ def test_to_labeled_system(self):
                 os.unlink(output_file)
 
     def test_to_multi_systems(self):
-        """Test writing multiple systems to a single QUIP/GAP XYZ format file"""
+        """Test writing multiple systems to a single QUIP/GAP XYZ format file."""
         with tempfile.NamedTemporaryFile(
             mode="w", suffix=".xyz", delete=False
         ) as tmp_file:
@@ -84,7 +84,7 @@ def test_to_multi_systems(self):
                 os.unlink(output_file)
 
     def test_roundtrip_consistency(self):
-        """Test that writing and reading back preserves data consistency"""
+        """Test that writing and reading back preserves data consistency."""
         with tempfile.NamedTemporaryFile(
             mode="w", suffix=".xyz", delete=False
         ) as tmp_file:

From 6dac45867028ccdef0572469500fc30702a60d6e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 1 Sep 2025 14:16:29 +0000
Subject: [PATCH 10/10] fix(quip/gap/xyz): remove linebreak between
 configurations in multi-system export

Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com>
---
 dpdata/plugins/xyz.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py
index 8baad0725..8ec59486a 100644
--- a/dpdata/plugins/xyz.py
+++ b/dpdata/plugins/xyz.py
@@ -80,12 +80,9 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs):
         content = "\n".join(frames)
 
         if isinstance(file_name, io.IOBase):
-            # When writing to a file handler, check if file is empty
-            # If not empty, add a newline separator before content
-            current_pos = file_name.tell()
-            if current_pos > 0:
-                file_name.write("\n")
             file_name.write(content)
+            if not content.endswith("\n"):
+                file_name.write("\n")
         else:
             with open_file(file_name, "w") as fp:
                 fp.write(content)