From bf15bc34f9ac00e3f3d6c1129c4d61384005f4f6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:45:13 +0000 Subject: [PATCH 01/10] Initial plan From fbe45c1c45e4277b04fe71ecc0a09c1cdb5c0dcc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:57:37 +0000 Subject: [PATCH 02/10] feat(quip/gap/xyz): implement to_labeled_system and to_multi_systems methods Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- dpdata/plugins/xyz.py | 127 ++++++++++++++++++++++++++ tests/test_quip_gap_xyz_to_methods.py | 105 +++++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 tests/test_quip_gap_xyz_to_methods.py diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index d56a8618c..412308b45 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -5,6 +5,7 @@ import numpy as np from dpdata.format import Format +from dpdata.periodic_table import Element from dpdata.utils import open_file if TYPE_CHECKING: @@ -50,9 +51,135 @@ def from_system(self, file_name: FileType, **kwargs): @Format.register("quip/gap/xyz") @Format.register("quip/gap/xyz_file") class QuipGapXYZFormat(Format): + # Class variable to track which files have been written to + _written_files = set() + def from_labeled_system(self, data, **kwargs): return data def from_multi_systems(self, file_name, **kwargs): # here directory is the file_name return QuipGapxyzSystems(file_name) + + def to_labeled_system(self, data, file_name: FileType, **kwargs): + """Write LabeledSystem data to QUIP/GAP XYZ format file. + + Parameters + ---------- + data : dict + system data + file_name : FileType + output file name + **kwargs : dict + additional arguments + """ + frames = [] + nframes = len(data["energies"]) + + for frame_idx in range(nframes): + frame_lines = self._format_single_frame(data, frame_idx) + frames.append("\n".join(frame_lines)) + + # Determine if we should append based on whether file was written before + file_path = str(file_name) + if file_path in self._written_files: + mode = "a" + else: + mode = "w" + self._written_files.add(file_path) + + with open_file(file_name, mode) as fp: + if mode == "a": + # Add newline separator if appending + fp.write("\n") + fp.write("\n".join(frames)) + + def to_multi_systems(self, formulas, directory, **kwargs): + """Return single filename for all systems in QUIP/GAP XYZ format. + + For QUIP/GAP XYZ format, all systems are written to a single file. + We use a class variable to track which files have been written to. + + Parameters + ---------- + formulas : list[str] + list of system names/formulas + directory : str + output filename + **kwargs : dict + additional arguments + + Returns + ------- + list[str] + list with same filename for all systems + """ + # Clear the written files set for this new operation + file_path = str(directory) + if file_path in self._written_files: + self._written_files.remove(file_path) + + # Return the same filename for all systems + return [directory] * len(formulas) + + def _format_single_frame(self, data, frame_idx): + """Format a single frame of system data into QUIP/GAP XYZ format lines. + + Parameters + ---------- + data : dict + system data + frame_idx : int + frame index + + Returns + ------- + list[str] + lines for the frame + """ + # Number of atoms + natoms = len(data["atom_types"]) + + # Build header line with metadata + header_parts = [] + + # Energy + energy = data["energies"][frame_idx] + header_parts.append(f"energy={energy:.12e}") + + # Virial (if present) + if "virials" in data: + virial = data["virials"][frame_idx] + virial_str = " ".join(f"{v:.12e}" for v in virial.flatten()) + header_parts.append(f'virial="{virial_str}"') + + # Lattice + cell = data["cells"][frame_idx] + lattice_str = " ".join(f"{c:.12e}" for c in cell.flatten()) + header_parts.append(f'Lattice="{lattice_str}"') + + # Properties + header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3") + + header_line = " ".join(header_parts) + + # Format atom lines + atom_lines = [] + coords = data["coords"][frame_idx] + forces = data["forces"][frame_idx] + atom_names = np.array(data["atom_names"]) + atom_types = data["atom_types"] + + for i in range(natoms): + atom_type_idx = atom_types[i] + species = atom_names[atom_type_idx] + x, y, z = coords[i] + fx, fy, fz = forces[i] + atomic_number = Element(species).Z + + atom_line = f"{species} {x:.11e} {y:.11e} {z:.11e} {atomic_number} {fx:.11e} {fy:.11e} {fz:.11e}" + atom_lines.append(atom_line) + + # Combine all lines for this frame + frame_lines = [str(natoms), header_line] + atom_lines + return frame_lines diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py new file mode 100644 index 000000000..141a1e4ca --- /dev/null +++ b/tests/test_quip_gap_xyz_to_methods.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +import tempfile +import os +import unittest +from context import dpdata + + +class TestQuipGapXYZToMethods(unittest.TestCase): + """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat""" + + def setUp(self): + """Set up test data""" + # Load test multi-systems + self.multi_systems = dpdata.MultiSystems.from_file( + "xyz/xyz_unittest.xyz", "quip/gap/xyz" + ) + self.system_b1c9 = self.multi_systems.systems["B1C9"] + self.system_b5c7 = self.multi_systems.systems["B5C7"] + + def test_to_labeled_system(self): + """Test writing a single labeled system to QUIP/GAP XYZ format""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file: + output_file = tmp_file.name + + try: + # Write the system to file + self.system_b1c9.to('quip/gap/xyz', output_file) + + # Verify file was created and has content + self.assertTrue(os.path.exists(output_file)) + with open(output_file, 'r') as f: + content = f.read() + self.assertTrue(len(content) > 0) + + # Read back and verify we can parse it + reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz') + self.assertEqual(len(reloaded_multi.systems), 1) + + # Verify the data matches (we should have the same system) + reloaded_system = list(reloaded_multi.systems.values())[0] + self.assertEqual(len(reloaded_system), len(self.system_b1c9)) + + finally: + if os.path.exists(output_file): + os.unlink(output_file) + + def test_to_multi_systems(self): + """Test writing multiple systems to a single QUIP/GAP XYZ format file""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file: + output_file = tmp_file.name + + try: + # Write all systems to file + self.multi_systems.to('quip/gap/xyz', output_file) + + # Verify file was created and has content + self.assertTrue(os.path.exists(output_file)) + with open(output_file, 'r') as f: + content = f.read() + self.assertTrue(len(content) > 0) + + # Read back and verify we get the same number of systems + reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz') + self.assertEqual(len(reloaded_multi.systems), len(self.multi_systems.systems)) + + # Verify total number of frames is preserved + original_frames = sum(len(sys) for sys in self.multi_systems.systems.values()) + reloaded_frames = sum(len(sys) for sys in reloaded_multi.systems.values()) + self.assertEqual(reloaded_frames, original_frames) + + finally: + if os.path.exists(output_file): + os.unlink(output_file) + + def test_roundtrip_consistency(self): + """Test that writing and reading back preserves data consistency""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file: + output_file = tmp_file.name + + try: + # Write and read back + self.multi_systems.to('quip/gap/xyz', output_file) + reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz') + + # Compare original and reloaded data for each system + for system_name in self.multi_systems.systems: + if system_name in reloaded_multi.systems: + original = self.multi_systems.systems[system_name] + reloaded = reloaded_multi.systems[system_name] + + # Check basic properties + self.assertEqual(len(original), len(reloaded)) + self.assertEqual(len(original.data['atom_names']), len(reloaded.data['atom_names'])) + + # Note: We don't check exact numerical equality because of floating point precision + # and potential differences in formatting, but the data should be structurally the same + + finally: + if os.path.exists(output_file): + os.unlink(output_file) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From cf6f5337235bfa196461643280531bc2c03d27ba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:59:23 +0000 Subject: [PATCH 03/10] docs: update test comments to reflect correct QUIP/GAP XYZ usage pattern Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- tests/test_quip_gap_xyz_to_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py index 141a1e4ca..47f893a99 100644 --- a/tests/test_quip_gap_xyz_to_methods.py +++ b/tests/test_quip_gap_xyz_to_methods.py @@ -33,7 +33,7 @@ def test_to_labeled_system(self): content = f.read() self.assertTrue(len(content) > 0) - # Read back and verify we can parse it + # Read back and verify we can parse it (use MultiSystems.from_file for QUIP/GAP XYZ) reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz') self.assertEqual(len(reloaded_multi.systems), 1) From cc22db15a00ba1c9ed3767d1ec85812433ced827 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Sep 2025 12:23:53 +0000 Subject: [PATCH 04/10] refactor(quip/gap/xyz): use yield pattern and move format function per reviewer feedback Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- dpdata/plugins/xyz.py | 95 ++++++-------------------------------- dpdata/xyz/quip_gap_xyz.py | 65 ++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 81 deletions(-) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 412308b45..96e596343 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from dpdata.utils import FileType -from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems +from dpdata.xyz.quip_gap_xyz import QuipGapxyzSystems, format_single_frame from dpdata.xyz.xyz import coord_to_xyz, xyz_to_coord @@ -51,8 +51,6 @@ def from_system(self, file_name: FileType, **kwargs): @Format.register("quip/gap/xyz") @Format.register("quip/gap/xyz_file") class QuipGapXYZFormat(Format): - # Class variable to track which files have been written to - _written_files = set() def from_labeled_system(self, data, **kwargs): return data @@ -77,16 +75,16 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs): nframes = len(data["energies"]) for frame_idx in range(nframes): - frame_lines = self._format_single_frame(data, frame_idx) + frame_lines = format_single_frame(data, frame_idx) frames.append("\n".join(frame_lines)) - # Determine if we should append based on whether file was written before + # Check if file exists to determine write mode file_path = str(file_name) - if file_path in self._written_files: + import os + if os.path.exists(file_path): mode = "a" else: mode = "w" - self._written_files.add(file_path) with open_file(file_name, mode) as fp: if mode == "a": @@ -98,7 +96,6 @@ def to_multi_systems(self, formulas, directory, **kwargs): """Return single filename for all systems in QUIP/GAP XYZ format. For QUIP/GAP XYZ format, all systems are written to a single file. - We use a class variable to track which files have been written to. Parameters ---------- @@ -109,77 +106,13 @@ def to_multi_systems(self, formulas, directory, **kwargs): **kwargs : dict additional arguments - Returns - ------- - list[str] - list with same filename for all systems + Yields + ------ + str + same filename for all systems """ - # Clear the written files set for this new operation - file_path = str(directory) - if file_path in self._written_files: - self._written_files.remove(file_path) - - # Return the same filename for all systems - return [directory] * len(formulas) - - def _format_single_frame(self, data, frame_idx): - """Format a single frame of system data into QUIP/GAP XYZ format lines. - - Parameters - ---------- - data : dict - system data - frame_idx : int - frame index - - Returns - ------- - list[str] - lines for the frame - """ - # Number of atoms - natoms = len(data["atom_types"]) - - # Build header line with metadata - header_parts = [] - - # Energy - energy = data["energies"][frame_idx] - header_parts.append(f"energy={energy:.12e}") - - # Virial (if present) - if "virials" in data: - virial = data["virials"][frame_idx] - virial_str = " ".join(f"{v:.12e}" for v in virial.flatten()) - header_parts.append(f'virial="{virial_str}"') - - # Lattice - cell = data["cells"][frame_idx] - lattice_str = " ".join(f"{c:.12e}" for c in cell.flatten()) - header_parts.append(f'Lattice="{lattice_str}"') - - # Properties - header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3") - - header_line = " ".join(header_parts) - - # Format atom lines - atom_lines = [] - coords = data["coords"][frame_idx] - forces = data["forces"][frame_idx] - atom_names = np.array(data["atom_names"]) - atom_types = data["atom_types"] - - for i in range(natoms): - atom_type_idx = atom_types[i] - species = atom_names[atom_type_idx] - x, y, z = coords[i] - fx, fy, fz = forces[i] - atomic_number = Element(species).Z - - atom_line = f"{species} {x:.11e} {y:.11e} {z:.11e} {atomic_number} {fx:.11e} {fy:.11e} {fz:.11e}" - atom_lines.append(atom_line) - - # Combine all lines for this frame - frame_lines = [str(natoms), header_line] + atom_lines - return frame_lines + with open_file(directory, 'w'): + # Just create/truncate the file, then yield filenames + pass + for _ in formulas: + yield directory diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index cba971e47..c43342973 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -7,6 +7,8 @@ import numpy as np +from dpdata.periodic_table import Element + class QuipGapxyzSystems: """deal with QuipGapxyzFile.""" @@ -183,3 +185,66 @@ def handle_single_xyz_frame(lines): info_dict["virials"] = virials info_dict["orig"] = np.zeros(3) return info_dict + + +def format_single_frame(data, frame_idx): + """Format a single frame of system data into QUIP/GAP XYZ format lines. + + Parameters + ---------- + data : dict + system data + frame_idx : int + frame index + + Returns + ------- + list[str] + lines for the frame + """ + # Number of atoms + natoms = len(data["atom_types"]) + + # Build header line with metadata + header_parts = [] + + # Energy + energy = data["energies"][frame_idx] + header_parts.append(f"energy={energy:.12e}") + + # Virial (if present) + if "virials" in data: + virial = data["virials"][frame_idx] + virial_str = " ".join(f"{v:.12e}" for v in virial.flatten()) + header_parts.append(f'virial="{virial_str}"') + + # Lattice + cell = data["cells"][frame_idx] + lattice_str = " ".join(f"{c:.12e}" for c in cell.flatten()) + header_parts.append(f'Lattice="{lattice_str}"') + + # Properties + header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3") + + header_line = " ".join(header_parts) + + # Format atom lines + atom_lines = [] + coords = data["coords"][frame_idx] + forces = data["forces"][frame_idx] + atom_names = np.array(data["atom_names"]) + atom_types = data["atom_types"] + + for i in range(natoms): + atom_type_idx = atom_types[i] + species = atom_names[atom_type_idx] + x, y, z = coords[i] + fx, fy, fz = forces[i] + atomic_number = Element(species).Z + + atom_line = f"{species} {x:.11e} {y:.11e} {z:.11e} {atomic_number} {fx:.11e} {fy:.11e} {fz:.11e}" + atom_lines.append(atom_line) + + # Combine all lines for this frame + frame_lines = [str(natoms), header_line] + atom_lines + return frame_lines From 794887bf0e43fa9335e92dd8d7bac1e2038c1d3f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Sep 2025 12:47:27 +0000 Subject: [PATCH 05/10] refactor(quip/gap/xyz): implement file handler yield pattern and improve import organization Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- dpdata/plugins/xyz.py | 63 ++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 96e596343..851e60a27 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -1,11 +1,12 @@ from __future__ import annotations +import io +import os from typing import TYPE_CHECKING import numpy as np from dpdata.format import Format -from dpdata.periodic_table import Element from dpdata.utils import open_file if TYPE_CHECKING: @@ -51,7 +52,6 @@ def from_system(self, file_name: FileType, **kwargs): @Format.register("quip/gap/xyz") @Format.register("quip/gap/xyz_file") class QuipGapXYZFormat(Format): - def from_labeled_system(self, data, **kwargs): return data @@ -61,58 +61,61 @@ def from_multi_systems(self, file_name, **kwargs): def to_labeled_system(self, data, file_name: FileType, **kwargs): """Write LabeledSystem data to QUIP/GAP XYZ format file. - + Parameters ---------- data : dict system data file_name : FileType - output file name + output file name or file handler **kwargs : dict additional arguments """ frames = [] nframes = len(data["energies"]) - + for frame_idx in range(nframes): frame_lines = format_single_frame(data, frame_idx) frames.append("\n".join(frame_lines)) - - # Check if file exists to determine write mode - file_path = str(file_name) - import os - if os.path.exists(file_path): - mode = "a" + + # Always write unless input is a file handler + if isinstance(file_name, io.IOBase): + # File handler - check if we need to append + file_path = getattr(file_name, "name", None) + if file_path and os.path.exists(file_path): + mode = "a" + else: + mode = "w" + with open_file(file_name, mode) as fp: + if mode == "a": + # Add newline separator if appending + fp.write("\n") + fp.write("\n".join(frames)) else: - mode = "w" - - with open_file(file_name, mode) as fp: - if mode == "a": - # Add newline separator if appending - fp.write("\n") - fp.write("\n".join(frames)) + # File path - always write (overwrite) + with open_file(file_name, "w") as fp: + fp.write("\n".join(frames)) def to_multi_systems(self, formulas, directory, **kwargs): """Return single filename for all systems in QUIP/GAP XYZ format. - + For QUIP/GAP XYZ format, all systems are written to a single file. - + Parameters ---------- formulas : list[str] - list of system names/formulas + list of system names/formulas directory : str - output filename + output filename **kwargs : dict additional arguments - + Yields ------ - str - same filename for all systems + file handler + file handler for all systems """ - with open_file(directory, 'w'): - # Just create/truncate the file, then yield filenames - pass - for _ in formulas: - yield directory + with open_file(directory, "w") as f: + # Just create/truncate the file, then yield file handlers + for _ in formulas: + yield f From 573021a92c4dbd73fb2aaca8ed6e579cec3dc3e4 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 1 Sep 2025 20:52:56 +0800 Subject: [PATCH 06/10] Update xyz.py Signed-off-by: Jinzhe Zeng --- dpdata/plugins/xyz.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 851e60a27..740a0c14b 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -78,21 +78,9 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs): frame_lines = format_single_frame(data, frame_idx) frames.append("\n".join(frame_lines)) - # Always write unless input is a file handler if isinstance(file_name, io.IOBase): - # File handler - check if we need to append - file_path = getattr(file_name, "name", None) - if file_path and os.path.exists(file_path): - mode = "a" - else: - mode = "w" - with open_file(file_name, mode) as fp: - if mode == "a": - # Add newline separator if appending - fp.write("\n") - fp.write("\n".join(frames)) + file_name.write("\n".join(frames)) else: - # File path - always write (overwrite) with open_file(file_name, "w") as fp: fp.write("\n".join(frames)) From 683e4f1349b20443c9e32bd15415d43cc46a5759 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Sep 2025 12:53:21 +0000 Subject: [PATCH 07/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/plugins/xyz.py | 1 - dpdata/xyz/quip_gap_xyz.py | 24 ++++---- tests/test_quip_gap_xyz_to_methods.py | 87 ++++++++++++++++----------- 3 files changed, 63 insertions(+), 49 deletions(-) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 740a0c14b..1864f2219 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -1,7 +1,6 @@ from __future__ import annotations import io -import os from typing import TYPE_CHECKING import numpy as np diff --git a/dpdata/xyz/quip_gap_xyz.py b/dpdata/xyz/quip_gap_xyz.py index c43342973..71e976de6 100644 --- a/dpdata/xyz/quip_gap_xyz.py +++ b/dpdata/xyz/quip_gap_xyz.py @@ -189,14 +189,14 @@ def handle_single_xyz_frame(lines): def format_single_frame(data, frame_idx): """Format a single frame of system data into QUIP/GAP XYZ format lines. - + Parameters ---------- data : dict system data frame_idx : int frame index - + Returns ------- list[str] @@ -204,47 +204,47 @@ def format_single_frame(data, frame_idx): """ # Number of atoms natoms = len(data["atom_types"]) - + # Build header line with metadata header_parts = [] - + # Energy energy = data["energies"][frame_idx] header_parts.append(f"energy={energy:.12e}") - + # Virial (if present) if "virials" in data: virial = data["virials"][frame_idx] virial_str = " ".join(f"{v:.12e}" for v in virial.flatten()) header_parts.append(f'virial="{virial_str}"') - + # Lattice cell = data["cells"][frame_idx] lattice_str = " ".join(f"{c:.12e}" for c in cell.flatten()) header_parts.append(f'Lattice="{lattice_str}"') - + # Properties header_parts.append("Properties=species:S:1:pos:R:3:Z:I:1:force:R:3") - + header_line = " ".join(header_parts) - + # Format atom lines atom_lines = [] coords = data["coords"][frame_idx] forces = data["forces"][frame_idx] atom_names = np.array(data["atom_names"]) atom_types = data["atom_types"] - + for i in range(natoms): atom_type_idx = atom_types[i] species = atom_names[atom_type_idx] x, y, z = coords[i] fx, fy, fz = forces[i] atomic_number = Element(species).Z - + atom_line = f"{species} {x:.11e} {y:.11e} {z:.11e} {atomic_number} {fx:.11e} {fy:.11e} {fz:.11e}" atom_lines.append(atom_line) - + # Combine all lines for this frame frame_lines = [str(natoms), header_line] + atom_lines return frame_lines diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py index 47f893a99..c34d37af6 100644 --- a/tests/test_quip_gap_xyz_to_methods.py +++ b/tests/test_quip_gap_xyz_to_methods.py @@ -1,105 +1,120 @@ #!/usr/bin/env python3 +from __future__ import annotations -import tempfile import os +import tempfile import unittest + from context import dpdata class TestQuipGapXYZToMethods(unittest.TestCase): """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat""" - + def setUp(self): """Set up test data""" - # Load test multi-systems + # Load test multi-systems self.multi_systems = dpdata.MultiSystems.from_file( "xyz/xyz_unittest.xyz", "quip/gap/xyz" ) self.system_b1c9 = self.multi_systems.systems["B1C9"] self.system_b5c7 = self.multi_systems.systems["B5C7"] - + def test_to_labeled_system(self): """Test writing a single labeled system to QUIP/GAP XYZ format""" - with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file: + with tempfile.NamedTemporaryFile( + mode="w", suffix=".xyz", delete=False + ) as tmp_file: output_file = tmp_file.name - + try: # Write the system to file - self.system_b1c9.to('quip/gap/xyz', output_file) - + self.system_b1c9.to("quip/gap/xyz", output_file) + # Verify file was created and has content self.assertTrue(os.path.exists(output_file)) - with open(output_file, 'r') as f: + with open(output_file) as f: content = f.read() self.assertTrue(len(content) > 0) - + # Read back and verify we can parse it (use MultiSystems.from_file for QUIP/GAP XYZ) - reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz') + reloaded_multi = dpdata.MultiSystems.from_file(output_file, "quip/gap/xyz") self.assertEqual(len(reloaded_multi.systems), 1) - + # Verify the data matches (we should have the same system) reloaded_system = list(reloaded_multi.systems.values())[0] self.assertEqual(len(reloaded_system), len(self.system_b1c9)) - + finally: if os.path.exists(output_file): os.unlink(output_file) - + def test_to_multi_systems(self): """Test writing multiple systems to a single QUIP/GAP XYZ format file""" - with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file: + with tempfile.NamedTemporaryFile( + mode="w", suffix=".xyz", delete=False + ) as tmp_file: output_file = tmp_file.name - + try: # Write all systems to file - self.multi_systems.to('quip/gap/xyz', output_file) - + self.multi_systems.to("quip/gap/xyz", output_file) + # Verify file was created and has content self.assertTrue(os.path.exists(output_file)) - with open(output_file, 'r') as f: + with open(output_file) as f: content = f.read() self.assertTrue(len(content) > 0) - + # Read back and verify we get the same number of systems - reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz') - self.assertEqual(len(reloaded_multi.systems), len(self.multi_systems.systems)) - + reloaded_multi = dpdata.MultiSystems.from_file(output_file, "quip/gap/xyz") + self.assertEqual( + len(reloaded_multi.systems), len(self.multi_systems.systems) + ) + # Verify total number of frames is preserved - original_frames = sum(len(sys) for sys in self.multi_systems.systems.values()) + original_frames = sum( + len(sys) for sys in self.multi_systems.systems.values() + ) reloaded_frames = sum(len(sys) for sys in reloaded_multi.systems.values()) self.assertEqual(reloaded_frames, original_frames) - + finally: if os.path.exists(output_file): os.unlink(output_file) - + def test_roundtrip_consistency(self): """Test that writing and reading back preserves data consistency""" - with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz', delete=False) as tmp_file: + with tempfile.NamedTemporaryFile( + mode="w", suffix=".xyz", delete=False + ) as tmp_file: output_file = tmp_file.name - + try: # Write and read back - self.multi_systems.to('quip/gap/xyz', output_file) - reloaded_multi = dpdata.MultiSystems.from_file(output_file, 'quip/gap/xyz') - + self.multi_systems.to("quip/gap/xyz", output_file) + reloaded_multi = dpdata.MultiSystems.from_file(output_file, "quip/gap/xyz") + # Compare original and reloaded data for each system for system_name in self.multi_systems.systems: if system_name in reloaded_multi.systems: original = self.multi_systems.systems[system_name] reloaded = reloaded_multi.systems[system_name] - + # Check basic properties self.assertEqual(len(original), len(reloaded)) - self.assertEqual(len(original.data['atom_names']), len(reloaded.data['atom_names'])) - + self.assertEqual( + len(original.data["atom_names"]), + len(reloaded.data["atom_names"]), + ) + # Note: We don't check exact numerical equality because of floating point precision # and potential differences in formatting, but the data should be structurally the same - + finally: if os.path.exists(output_file): os.unlink(output_file) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From 8871e2f184b39c517d35d4b5ccb421883a9ef5f8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Sep 2025 13:37:48 +0000 Subject: [PATCH 08/10] fix(quip/gap/xyz): fix multi-systems export to handle file handler positioning correctly Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- dpdata/plugins/xyz.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 1864f2219..8baad0725 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -77,11 +77,18 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs): frame_lines = format_single_frame(data, frame_idx) frames.append("\n".join(frame_lines)) + content = "\n".join(frames) + if isinstance(file_name, io.IOBase): - file_name.write("\n".join(frames)) + # When writing to a file handler, check if file is empty + # If not empty, add a newline separator before content + current_pos = file_name.tell() + if current_pos > 0: + file_name.write("\n") + file_name.write(content) else: with open_file(file_name, "w") as fp: - fp.write("\n".join(frames)) + fp.write(content) def to_multi_systems(self, formulas, directory, **kwargs): """Return single filename for all systems in QUIP/GAP XYZ format. From 3e19545d88b9efdc6728e9e5527b491af3fc2294 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 1 Sep 2025 21:51:17 +0800 Subject: [PATCH 09/10] Update docstrings for clarity in tests Signed-off-by: Jinzhe Zeng --- tests/test_quip_gap_xyz_to_methods.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_quip_gap_xyz_to_methods.py b/tests/test_quip_gap_xyz_to_methods.py index c34d37af6..7b38cc334 100644 --- a/tests/test_quip_gap_xyz_to_methods.py +++ b/tests/test_quip_gap_xyz_to_methods.py @@ -9,10 +9,10 @@ class TestQuipGapXYZToMethods(unittest.TestCase): - """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat""" + """Test the to_labeled_system and to_multi_systems methods for QuipGapXYZFormat.""" def setUp(self): - """Set up test data""" + """Set up test data.""" # Load test multi-systems self.multi_systems = dpdata.MultiSystems.from_file( "xyz/xyz_unittest.xyz", "quip/gap/xyz" @@ -21,7 +21,7 @@ def setUp(self): self.system_b5c7 = self.multi_systems.systems["B5C7"] def test_to_labeled_system(self): - """Test writing a single labeled system to QUIP/GAP XYZ format""" + """Test writing a single labeled system to QUIP/GAP XYZ format.""" with tempfile.NamedTemporaryFile( mode="w", suffix=".xyz", delete=False ) as tmp_file: @@ -50,7 +50,7 @@ def test_to_labeled_system(self): os.unlink(output_file) def test_to_multi_systems(self): - """Test writing multiple systems to a single QUIP/GAP XYZ format file""" + """Test writing multiple systems to a single QUIP/GAP XYZ format file.""" with tempfile.NamedTemporaryFile( mode="w", suffix=".xyz", delete=False ) as tmp_file: @@ -84,7 +84,7 @@ def test_to_multi_systems(self): os.unlink(output_file) def test_roundtrip_consistency(self): - """Test that writing and reading back preserves data consistency""" + """Test that writing and reading back preserves data consistency.""" with tempfile.NamedTemporaryFile( mode="w", suffix=".xyz", delete=False ) as tmp_file: From 6dac45867028ccdef0572469500fc30702a60d6e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Sep 2025 14:16:29 +0000 Subject: [PATCH 10/10] fix(quip/gap/xyz): remove linebreak between configurations in multi-system export Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- dpdata/plugins/xyz.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dpdata/plugins/xyz.py b/dpdata/plugins/xyz.py index 8baad0725..8ec59486a 100644 --- a/dpdata/plugins/xyz.py +++ b/dpdata/plugins/xyz.py @@ -80,12 +80,9 @@ def to_labeled_system(self, data, file_name: FileType, **kwargs): content = "\n".join(frames) if isinstance(file_name, io.IOBase): - # When writing to a file handler, check if file is empty - # If not empty, add a newline separator before content - current_pos = file_name.tell() - if current_pos > 0: - file_name.write("\n") file_name.write(content) + if not content.endswith("\n"): + file_name.write("\n") else: with open_file(file_name, "w") as fp: fp.write(content)