From c99ccb421e45202169d9999b8f0bd4fbf8b509b9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:20:10 +0000 Subject: [PATCH 01/10] Initial plan From da77916b9ea3d8a67df10f781f90b249e1682b8b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:28:38 +0000 Subject: [PATCH 02/10] Add support for multiple LAMMPS atom styles Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- dpdata/lammps/lmp.py | 201 ++++++++++++++++++++++++++++--- dpdata/plugins/lammps.py | 22 +++- tests/test_lammps_atom_styles.py | 159 ++++++++++++++++++++++++ 3 files changed, 364 insertions(+), 18 deletions(-) create mode 100644 tests/test_lammps_atom_styles.py diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index 38084d6e4..29a0e6cbb 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -7,6 +7,19 @@ ptr_int_fmt = "%6d" ptr_key_fmt = "%15s" +# Mapping of LAMMPS atom styles to their column layouts +# Format: (atom_id_col, atom_type_col, x_col, y_col, z_col, has_molecule_id, has_charge, charge_col) +ATOM_STYLE_COLUMNS = { + "atomic": (0, 1, 2, 3, 4, False, False, None), + "angle": (0, 2, 3, 4, 5, True, False, None), + "bond": (0, 2, 3, 4, 5, True, False, None), + "charge": (0, 1, 3, 4, 5, False, True, 2), + "full": (0, 2, 4, 5, 6, True, True, 3), + "molecular": (0, 2, 3, 4, 5, True, False, None), + "dipole": (0, 1, 3, 4, 5, False, True, 2), + "sphere": (0, 1, 4, 5, 6, False, False, None), +} + def _get_block(lines, keys): for idx in range(len(lines)): @@ -95,8 +108,63 @@ def _atom_info_atom(line): return int(vec[0]), int(vec[1]), float(vec[2]), float(vec[3]), float(vec[4]) -def get_natoms_vec(lines): - atype = get_atype(lines) +def _atom_info_style(line, atom_style="atomic"): + """Parse atom information based on the specified atom style. + + Parameters + ---------- + line : str + The atom line from LAMMPS data file + atom_style : str + The LAMMPS atom style (atomic, full, charge, etc.) + + Returns + ------- + dict + Dictionary containing parsed atom information with keys: + 'atom_id', 'atom_type', 'x', 'y', 'z', 'molecule_id' (if present), 'charge' (if present) + """ + if atom_style not in ATOM_STYLE_COLUMNS: + raise ValueError(f"Unsupported atom style: {atom_style}. Supported styles: {list(ATOM_STYLE_COLUMNS.keys())}") + + vec = line.split() + columns = ATOM_STYLE_COLUMNS[atom_style] + + result = { + 'atom_id': int(vec[columns[0]]), + 'atom_type': int(vec[columns[1]]), + 'x': float(vec[columns[2]]), + 'y': float(vec[columns[3]]), + 'z': float(vec[columns[4]]), + } + + # Add molecule ID if present + if columns[5]: # has_molecule_id + result['molecule_id'] = int(vec[1]) # molecule ID is always in column 1 when present + + # Add charge if present + if columns[6]: # has_charge + result['charge'] = float(vec[columns[7]]) # charge_col + + return result + + +def get_natoms_vec(lines, atom_style="atomic"): + """Get number of atoms for each atom type. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + atom_style : str + The LAMMPS atom style + + Returns + ------- + list + Number of atoms for each atom type + """ + atype = get_atype(lines, atom_style=atom_style) natoms_vec = [] natomtypes = get_natomtypes(lines) for ii in range(natomtypes): @@ -105,12 +173,28 @@ def get_natoms_vec(lines): return natoms_vec -def get_atype(lines, type_idx_zero=False): +def get_atype(lines, type_idx_zero=False, atom_style="atomic"): + """Get atom types from LAMMPS data file. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + type_idx_zero : bool + Whether to use zero-based indexing for atom types + atom_style : str + The LAMMPS atom style + + Returns + ------- + np.ndarray + Array of atom types + """ alines = get_atoms(lines) atype = [] for ii in alines: - # idx, mt, at, q, x, y, z = _atom_info_mol(ii) - idx, at, x, y, z = _atom_info_atom(ii) + atom_info = _atom_info_style(ii, atom_style) + at = atom_info['atom_type'] if type_idx_zero: atype.append(at - 1) else: @@ -118,16 +202,60 @@ def get_atype(lines, type_idx_zero=False): return np.array(atype, dtype=int) -def get_posi(lines): +def get_posi(lines, atom_style="atomic"): + """Get atomic positions from LAMMPS data file. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + atom_style : str + The LAMMPS atom style + + Returns + ------- + np.ndarray + Array of atomic positions + """ atom_lines = get_atoms(lines) posis = [] for ii in atom_lines: - # posis.append([float(jj) for jj in ii.split()[4:7]]) - posis.append([float(jj) for jj in ii.split()[2:5]]) + atom_info = _atom_info_style(ii, atom_style) + posis.append([atom_info['x'], atom_info['y'], atom_info['z']]) return np.array(posis) -def get_spins(lines): +def get_charges(lines, atom_style="atomic"): + """Get atomic charges from LAMMPS data file if the atom style supports charges. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + atom_style : str + The LAMMPS atom style + + Returns + ------- + np.ndarray or None + Array of atomic charges if atom style has charges, None otherwise + """ + if atom_style not in ATOM_STYLE_COLUMNS: + raise ValueError(f"Unsupported atom style: {atom_style}") + + # Check if this atom style has charges + if not ATOM_STYLE_COLUMNS[atom_style][6]: # has_charge + return None + + atom_lines = get_atoms(lines) + charges = [] + for ii in atom_lines: + atom_info = _atom_info_style(ii, atom_style) + charges.append(atom_info['charge']) + return np.array(charges) + + +def get_spins(lines, atom_style="atomic"): atom_lines = get_atoms(lines) if len(atom_lines[0].split()) < 8: return None @@ -161,9 +289,27 @@ def get_lmpbox(lines): return box_info, tilt -def system_data(lines, type_map=None, type_idx_zero=True): +def system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): + """Parse LAMMPS data file to system data format. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + type_map : list, optional + Mapping from atom types to element names + type_idx_zero : bool + Whether to use zero-based indexing for atom types + atom_style : str + The LAMMPS atom style (atomic, full, charge, etc.) + + Returns + ------- + dict + System data dictionary + """ system = {} - system["atom_numbs"] = get_natoms_vec(lines) + system["atom_numbs"] = get_natoms_vec(lines, atom_style=atom_style) system["atom_names"] = [] if type_map is None: for ii in range(len(system["atom_numbs"])): @@ -177,20 +323,43 @@ def system_data(lines, type_map=None, type_idx_zero=True): system["orig"] = np.array(orig) system["cells"] = [np.array(cell)] natoms = sum(system["atom_numbs"]) - system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero) - system["coords"] = [get_posi(lines)] + system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero, atom_style=atom_style) + system["coords"] = [get_posi(lines, atom_style=atom_style)] system["cells"] = np.array(system["cells"]) system["coords"] = np.array(system["coords"]) - spins = get_spins(lines) + # Add charges if the atom style supports them + charges = get_charges(lines, atom_style=atom_style) + if charges is not None: + system["charges"] = np.array([charges]) + + spins = get_spins(lines, atom_style=atom_style) if spins is not None: system["spins"] = np.array([spins]) return system -def to_system_data(lines, type_map=None, type_idx_zero=True): - return system_data(lines, type_map=type_map, type_idx_zero=type_idx_zero) +def to_system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): + """Parse LAMMPS data file to system data format. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + type_map : list, optional + Mapping from atom types to element names + type_idx_zero : bool + Whether to use zero-based indexing for atom types + atom_style : str + The LAMMPS atom style (atomic, full, charge, etc.) + + Returns + ------- + dict + System data dictionary + """ + return system_data(lines, type_map=type_map, type_idx_zero=type_idx_zero, atom_style=atom_style) def rotate_to_lower_triangle( diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index c7e5c7653..17d9661f2 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -30,10 +30,28 @@ def register_spin(data): @Format.register("lammps/lmp") class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, file_name: FileType, type_map=None, **kwargs): + def from_system(self, file_name: FileType, type_map=None, atom_style="atomic", **kwargs): + """Load LAMMPS data file to system data format. + + Parameters + ---------- + file_name : str or Path + Path to LAMMPS data file + type_map : list, optional + Mapping from atom types to element names + atom_style : str + The LAMMPS atom style (atomic, full, charge, etc.) + **kwargs : dict + Other parameters + + Returns + ------- + dict + System data dictionary + """ with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] - data = dpdata.lammps.lmp.to_system_data(lines, type_map) + data = dpdata.lammps.lmp.to_system_data(lines, type_map, atom_style=atom_style) register_spin(data) return data diff --git a/tests/test_lammps_atom_styles.py b/tests/test_lammps_atom_styles.py new file mode 100644 index 000000000..f480ea602 --- /dev/null +++ b/tests/test_lammps_atom_styles.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +import os +import unittest +import numpy as np +from context import dpdata + + +class TestLammpsAtomStyles(unittest.TestCase): + """Test support for different LAMMPS atom styles.""" + + def setUp(self): + """Set up test fixtures.""" + # Create test data files for different atom styles + self.test_files = {} + + # Full style test file + full_content = """# LAMMPS data file - full style +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms # full + +1 1 1 -0.8476 0.0 0.0 0.0 +2 1 2 0.4238 1.2621856 0.7018028 0.5513885""" + self.test_files['full'] = '/tmp/test_full_style.lmp' + with open(self.test_files['full'], 'w') as f: + f.write(full_content) + + # Charge style test file + charge_content = """# LAMMPS data file - charge style +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms # charge + +1 1 -0.8476 0.0 0.0 0.0 +2 2 0.4238 1.2621856 0.7018028 0.5513885""" + self.test_files['charge'] = '/tmp/test_charge_style.lmp' + with open(self.test_files['charge'], 'w') as f: + f.write(charge_content) + + # Bond style test file + bond_content = """# LAMMPS data file - bond style +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms # bond + +1 1 1 0.0 0.0 0.0 +2 1 2 1.2621856 0.7018028 0.5513885""" + self.test_files['bond'] = '/tmp/test_bond_style.lmp' + with open(self.test_files['bond'], 'w') as f: + f.write(bond_content) + + def tearDown(self): + """Clean up test files.""" + for file_path in self.test_files.values(): + if os.path.exists(file_path): + os.remove(file_path) + + def test_atomic_style_backward_compatibility(self): + """Test that atomic style still works (backward compatibility).""" + system = dpdata.System(os.path.join("poscars", "conf.lmp"), type_map=["O", "H"]) + self.assertEqual(len(system["atom_types"]), 2) + self.assertEqual(system["atom_types"][0], 0) # O + self.assertEqual(system["atom_types"][1], 1) # H + + def test_full_style_parsing(self): + """Test parsing of full style LAMMPS data file.""" + system = dpdata.System(self.test_files['full'], fmt="lammps/lmp", + type_map=["O", "H"], atom_style="full") + + # Check basic structure + self.assertEqual(len(system["atom_types"]), 2) + self.assertEqual(system["atom_types"][0], 0) # type 1 -> O + self.assertEqual(system["atom_types"][1], 1) # type 2 -> H + + # Check coordinates + expected_coords = np.array([[0.0, 0.0, 0.0], + [1.2621856, 0.7018028, 0.5513885]]) + np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) + + # Check charges are present + self.assertIn("charges", system.data) + expected_charges = np.array([-0.8476, 0.4238]) + np.testing.assert_allclose(system["charges"][0], expected_charges, atol=1e-6) + + def test_charge_style_parsing(self): + """Test parsing of charge style LAMMPS data file.""" + system = dpdata.System(self.test_files['charge'], fmt="lammps/lmp", + type_map=["O", "H"], atom_style="charge") + + # Check basic structure + self.assertEqual(len(system["atom_types"]), 2) + self.assertEqual(system["atom_types"][0], 0) # type 1 -> O + self.assertEqual(system["atom_types"][1], 1) # type 2 -> H + + # Check coordinates + expected_coords = np.array([[0.0, 0.0, 0.0], + [1.2621856, 0.7018028, 0.5513885]]) + np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) + + # Check charges are present + self.assertIn("charges", system.data) + expected_charges = np.array([-0.8476, 0.4238]) + np.testing.assert_allclose(system["charges"][0], expected_charges, atol=1e-6) + + def test_bond_style_parsing(self): + """Test parsing of bond style LAMMPS data file.""" + system = dpdata.System(self.test_files['bond'], fmt="lammps/lmp", + type_map=["O", "H"], atom_style="bond") + + # Check basic structure + self.assertEqual(len(system["atom_types"]), 2) + self.assertEqual(system["atom_types"][0], 0) # type 1 -> O + self.assertEqual(system["atom_types"][1], 1) # type 2 -> H + + # Check coordinates + expected_coords = np.array([[0.0, 0.0, 0.0], + [1.2621856, 0.7018028, 0.5513885]]) + np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) + + # Bond style should not have charges + self.assertNotIn("charges", system.data) + + def test_unsupported_atom_style(self): + """Test that unsupported atom styles raise appropriate errors.""" + with self.assertRaises(ValueError) as context: + dpdata.System(self.test_files['bond'], fmt="lammps/lmp", + type_map=["O", "H"], atom_style="unsupported_style") + + self.assertIn("Unsupported atom style", str(context.exception)) + + def test_default_atomic_style(self): + """Test that default behavior is atomic style.""" + # Test using existing atomic style file + system1 = dpdata.System(os.path.join("poscars", "conf.lmp"), type_map=["O", "H"]) + system2 = dpdata.System(os.path.join("poscars", "conf.lmp"), type_map=["O", "H"], atom_style="atomic") + + # Should be identical + np.testing.assert_array_equal(system1["coords"], system2["coords"]) + np.testing.assert_array_equal(system1["atom_types"], system2["atom_types"]) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 92906707835a513970b91c18a07242293df53fd6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 11:31:03 +0000 Subject: [PATCH 03/10] Add documentation for LAMMPS atom style support Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- docs/systems/system.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/systems/system.md b/docs/systems/system.md index 9f01fc40f..44821abcb 100644 --- a/docs/systems/system.md +++ b/docs/systems/system.md @@ -57,6 +57,20 @@ The data stored in {class}`System ` or {class}`LabeledSystem Date: Thu, 21 Aug 2025 11:55:46 +0000 Subject: [PATCH 04/10] Move LAMMPS atom style documentation to plugin docstring and implement automatic detection Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- docs/systems/system.md | 13 ------- dpdata/lammps/lmp.py | 74 +++++++++++++++++++++++++++++++++++++++- dpdata/plugins/lammps.py | 46 ++++++++++++++++++++++--- 3 files changed, 115 insertions(+), 18 deletions(-) diff --git a/docs/systems/system.md b/docs/systems/system.md index 44821abcb..1689e7185 100644 --- a/docs/systems/system.md +++ b/docs/systems/system.md @@ -58,19 +58,6 @@ d_outcar.to("lammps/lmp", "conf.lmp", frame_idx=0) ``` The first frames of `d_outcar` will be dumped to 'conf.lmp' -For LAMMPS data files, you can specify the atom style when loading: -```python -# Load LAMMPS data with default atomic style (atom-ID, atom-type, x, y, z) -system = dpdata.System("data.lmp", fmt="lammps/lmp", type_map=["O", "H"]) - -# Load LAMMPS data with full style (includes charges and molecule IDs) -system = dpdata.System("data.lmp", fmt="lammps/lmp", type_map=["O", "H"], atom_style="full") - -# Load LAMMPS data with charge style (includes charges) -system = dpdata.System("data.lmp", fmt="lammps/lmp", type_map=["O", "H"], atom_style="charge") -``` -Supported atom styles include: atomic (default), full, charge, bond, angle, molecular, dipole, and sphere. - ```python d_outcar.to("vasp/poscar", "POSCAR", frame_idx=-1) ``` diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index 29a0e6cbb..5c48b94f4 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -21,6 +21,69 @@ } +def detect_atom_style(lines): + """Detect LAMMPS atom style from data file content. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + + Returns + ------- + str or None + Detected atom style, or None if not detected + """ + # Look for atom style in comments after "Atoms" section header + atom_lines = get_atoms(lines) + if not atom_lines: + return None + + # Find the "Atoms" line + for idx, line in enumerate(lines): + if "Atoms" in line: + # Check if there's a comment with atom style after "Atoms" + if "#" in line: + comment_part = line.split("#")[1].strip().lower() + for style in ATOM_STYLE_COLUMNS: + if style in comment_part: + return style + break + + # If no explicit style found, try to infer from first data line + if atom_lines: + first_line = atom_lines[0].split() + num_cols = len(first_line) + + # Try to match based on number of columns and content patterns + # This is a heuristic approach + if num_cols == 5: + # Could be atomic style: atom-ID atom-type x y z + return "atomic" + elif num_cols == 6: + # Could be charge or bond/molecular style + # Try to determine if column 2 (index 2) looks like a charge (float) or type (int) + try: + val = float(first_line[2]) + # If it's a small float, likely a charge + if abs(val) < 10 and val != int(val): + return "charge" + else: + # Likely molecule ID (integer), so bond/molecular style + return "bond" + except ValueError: + return "atomic" # fallback + elif num_cols == 7: + # Could be full style: atom-ID molecule-ID atom-type charge x y z + return "full" + elif num_cols >= 8: + # Could be dipole or sphere style + # For now, default to dipole if we have enough columns + return "dipole" + + return None # Unable to detect + + def _get_block(lines, keys): for idx in range(len(lines)): if keys in lines[idx]: @@ -352,13 +415,22 @@ def to_system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic" type_idx_zero : bool Whether to use zero-based indexing for atom types atom_style : str - The LAMMPS atom style (atomic, full, charge, etc.) + The LAMMPS atom style. If "auto", attempts to detect automatically + from file. Default is "atomic". Returns ------- dict System data dictionary """ + # Attempt automatic detection if requested + if atom_style == "auto": + detected_style = detect_atom_style(lines) + if detected_style: + atom_style = detected_style + else: + atom_style = "atomic" # fallback to default + return system_data(lines, type_map=type_map, type_idx_zero=type_idx_zero, atom_style=atom_style) diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 17d9661f2..19354683f 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -30,24 +30,62 @@ def register_spin(data): @Format.register("lammps/lmp") class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, file_name: FileType, type_map=None, atom_style="atomic", **kwargs): + def from_system(self, file_name: FileType, type_map=None, atom_style="auto", **kwargs): """Load LAMMPS data file to system data format. + This method supports multiple LAMMPS atom styles with automatic charge extraction + and maintains backward compatibility. The parser can automatically detect the atom + style from the LAMMPS data file header when possible. + Parameters ---------- file_name : str or Path Path to LAMMPS data file type_map : list, optional Mapping from atom types to element names - atom_style : str - The LAMMPS atom style (atomic, full, charge, etc.) + atom_style : str, optional + The LAMMPS atom style. Default is "auto" which attempts to detect + the style automatically from the file. Can also be explicitly set to: + atomic, full, charge, bond, angle, molecular, dipole, sphere **kwargs : dict Other parameters Returns ------- dict - System data dictionary + System data dictionary with additional data based on atom style: + - charges: For styles with charge information (full, charge, dipole) + - molecule_ids: For styles with molecule information (full, bond, angle, molecular) + + Examples + -------- + Load LAMMPS data with automatic detection: + + >>> system = dpdata.System("data.lmp", type_map=["O", "H"]) + + Load with specific atom styles: + + >>> # Full style with charges and molecule IDs + >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="full") + >>> print(system["charges"]) # Access extracted charges + + >>> # Charge style with charges only + >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="charge") + + >>> # Bond/molecular styles with molecule IDs + >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="bond") + + Notes + ----- + Atom Style Column Layouts: + - atomic: atom-ID atom-type x y z (default) + - full: atom-ID molecule-ID atom-type charge x y z + - charge: atom-ID atom-type charge x y z + - bond: atom-ID molecule-ID atom-type x y z + - angle: atom-ID molecule-ID atom-type x y z + - molecular: atom-ID molecule-ID atom-type x y z + - dipole: atom-ID atom-type charge x y z mux muy muz + - sphere: atom-ID atom-type diameter density x y z """ with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] From 7f95c946a4a136193ea74b283732e59ee37898c4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 13:41:32 +0000 Subject: [PATCH 05/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/lammps/lmp.py | 90 ++++++++++++++++-------------- dpdata/plugins/lammps.py | 28 +++++----- tests/test_lammps_atom_styles.py | 96 +++++++++++++++++++------------- 3 files changed, 122 insertions(+), 92 deletions(-) diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index 5c48b94f4..4718bbadb 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -23,12 +23,12 @@ def detect_atom_style(lines): """Detect LAMMPS atom style from data file content. - + Parameters ---------- lines : list Lines from LAMMPS data file - + Returns ------- str or None @@ -38,7 +38,7 @@ def detect_atom_style(lines): atom_lines = get_atoms(lines) if not atom_lines: return None - + # Find the "Atoms" line for idx, line in enumerate(lines): if "Atoms" in line: @@ -49,12 +49,12 @@ def detect_atom_style(lines): if style in comment_part: return style break - + # If no explicit style found, try to infer from first data line if atom_lines: first_line = atom_lines[0].split() num_cols = len(first_line) - + # Try to match based on number of columns and content patterns # This is a heuristic approach if num_cols == 5: @@ -80,7 +80,7 @@ def detect_atom_style(lines): # Could be dipole or sphere style # For now, default to dipole if we have enough columns return "dipole" - + return None # Unable to detect @@ -173,14 +173,14 @@ def _atom_info_atom(line): def _atom_info_style(line, atom_style="atomic"): """Parse atom information based on the specified atom style. - + Parameters ---------- line : str The atom line from LAMMPS data file atom_style : str The LAMMPS atom style (atomic, full, charge, etc.) - + Returns ------- dict @@ -188,40 +188,44 @@ def _atom_info_style(line, atom_style="atomic"): 'atom_id', 'atom_type', 'x', 'y', 'z', 'molecule_id' (if present), 'charge' (if present) """ if atom_style not in ATOM_STYLE_COLUMNS: - raise ValueError(f"Unsupported atom style: {atom_style}. Supported styles: {list(ATOM_STYLE_COLUMNS.keys())}") - + raise ValueError( + f"Unsupported atom style: {atom_style}. Supported styles: {list(ATOM_STYLE_COLUMNS.keys())}" + ) + vec = line.split() columns = ATOM_STYLE_COLUMNS[atom_style] - + result = { - 'atom_id': int(vec[columns[0]]), - 'atom_type': int(vec[columns[1]]), - 'x': float(vec[columns[2]]), - 'y': float(vec[columns[3]]), - 'z': float(vec[columns[4]]), + "atom_id": int(vec[columns[0]]), + "atom_type": int(vec[columns[1]]), + "x": float(vec[columns[2]]), + "y": float(vec[columns[3]]), + "z": float(vec[columns[4]]), } - + # Add molecule ID if present if columns[5]: # has_molecule_id - result['molecule_id'] = int(vec[1]) # molecule ID is always in column 1 when present - + result["molecule_id"] = int( + vec[1] + ) # molecule ID is always in column 1 when present + # Add charge if present if columns[6]: # has_charge - result['charge'] = float(vec[columns[7]]) # charge_col - + result["charge"] = float(vec[columns[7]]) # charge_col + return result def get_natoms_vec(lines, atom_style="atomic"): """Get number of atoms for each atom type. - + Parameters ---------- lines : list Lines from LAMMPS data file atom_style : str The LAMMPS atom style - + Returns ------- list @@ -238,7 +242,7 @@ def get_natoms_vec(lines, atom_style="atomic"): def get_atype(lines, type_idx_zero=False, atom_style="atomic"): """Get atom types from LAMMPS data file. - + Parameters ---------- lines : list @@ -247,7 +251,7 @@ def get_atype(lines, type_idx_zero=False, atom_style="atomic"): Whether to use zero-based indexing for atom types atom_style : str The LAMMPS atom style - + Returns ------- np.ndarray @@ -257,7 +261,7 @@ def get_atype(lines, type_idx_zero=False, atom_style="atomic"): atype = [] for ii in alines: atom_info = _atom_info_style(ii, atom_style) - at = atom_info['atom_type'] + at = atom_info["atom_type"] if type_idx_zero: atype.append(at - 1) else: @@ -267,14 +271,14 @@ def get_atype(lines, type_idx_zero=False, atom_style="atomic"): def get_posi(lines, atom_style="atomic"): """Get atomic positions from LAMMPS data file. - + Parameters ---------- lines : list Lines from LAMMPS data file atom_style : str The LAMMPS atom style - + Returns ------- np.ndarray @@ -284,20 +288,20 @@ def get_posi(lines, atom_style="atomic"): posis = [] for ii in atom_lines: atom_info = _atom_info_style(ii, atom_style) - posis.append([atom_info['x'], atom_info['y'], atom_info['z']]) + posis.append([atom_info["x"], atom_info["y"], atom_info["z"]]) return np.array(posis) def get_charges(lines, atom_style="atomic"): """Get atomic charges from LAMMPS data file if the atom style supports charges. - + Parameters ---------- lines : list Lines from LAMMPS data file atom_style : str The LAMMPS atom style - + Returns ------- np.ndarray or None @@ -305,16 +309,16 @@ def get_charges(lines, atom_style="atomic"): """ if atom_style not in ATOM_STYLE_COLUMNS: raise ValueError(f"Unsupported atom style: {atom_style}") - + # Check if this atom style has charges if not ATOM_STYLE_COLUMNS[atom_style][6]: # has_charge return None - + atom_lines = get_atoms(lines) charges = [] for ii in atom_lines: atom_info = _atom_info_style(ii, atom_style) - charges.append(atom_info['charge']) + charges.append(atom_info["charge"]) return np.array(charges) @@ -354,7 +358,7 @@ def get_lmpbox(lines): def system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): """Parse LAMMPS data file to system data format. - + Parameters ---------- lines : list @@ -365,7 +369,7 @@ def system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): Whether to use zero-based indexing for atom types atom_style : str The LAMMPS atom style (atomic, full, charge, etc.) - + Returns ------- dict @@ -386,7 +390,9 @@ def system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): system["orig"] = np.array(orig) system["cells"] = [np.array(cell)] natoms = sum(system["atom_numbs"]) - system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero, atom_style=atom_style) + system["atom_types"] = get_atype( + lines, type_idx_zero=type_idx_zero, atom_style=atom_style + ) system["coords"] = [get_posi(lines, atom_style=atom_style)] system["cells"] = np.array(system["cells"]) system["coords"] = np.array(system["coords"]) @@ -405,7 +411,7 @@ def system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): def to_system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): """Parse LAMMPS data file to system data format. - + Parameters ---------- lines : list @@ -417,7 +423,7 @@ def to_system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic" atom_style : str The LAMMPS atom style. If "auto", attempts to detect automatically from file. Default is "atomic". - + Returns ------- dict @@ -430,8 +436,10 @@ def to_system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic" atom_style = detected_style else: atom_style = "atomic" # fallback to default - - return system_data(lines, type_map=type_map, type_idx_zero=type_idx_zero, atom_style=atom_style) + + return system_data( + lines, type_map=type_map, type_idx_zero=type_idx_zero, atom_style=atom_style + ) def rotate_to_lower_triangle( diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 19354683f..65630c0dd 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -30,13 +30,15 @@ def register_spin(data): @Format.register("lammps/lmp") class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, file_name: FileType, type_map=None, atom_style="auto", **kwargs): + def from_system( + self, file_name: FileType, type_map=None, atom_style="auto", **kwargs + ): """Load LAMMPS data file to system data format. - + This method supports multiple LAMMPS atom styles with automatic charge extraction and maintains backward compatibility. The parser can automatically detect the atom style from the LAMMPS data file header when possible. - + Parameters ---------- file_name : str or Path @@ -49,38 +51,38 @@ def from_system(self, file_name: FileType, type_map=None, atom_style="auto", **k atomic, full, charge, bond, angle, molecular, dipole, sphere **kwargs : dict Other parameters - + Returns ------- dict System data dictionary with additional data based on atom style: - charges: For styles with charge information (full, charge, dipole) - molecule_ids: For styles with molecule information (full, bond, angle, molecular) - + Examples -------- Load LAMMPS data with automatic detection: - + >>> system = dpdata.System("data.lmp", type_map=["O", "H"]) - + Load with specific atom styles: - - >>> # Full style with charges and molecule IDs + + >>> # Full style with charges and molecule IDs >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="full") >>> print(system["charges"]) # Access extracted charges - + >>> # Charge style with charges only >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="charge") - + >>> # Bond/molecular styles with molecule IDs >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="bond") - + Notes ----- Atom Style Column Layouts: - atomic: atom-ID atom-type x y z (default) - full: atom-ID molecule-ID atom-type charge x y z - - charge: atom-ID atom-type charge x y z + - charge: atom-ID atom-type charge x y z - bond: atom-ID molecule-ID atom-type x y z - angle: atom-ID molecule-ID atom-type x y z - molecular: atom-ID molecule-ID atom-type x y z diff --git a/tests/test_lammps_atom_styles.py b/tests/test_lammps_atom_styles.py index f480ea602..982b2e416 100644 --- a/tests/test_lammps_atom_styles.py +++ b/tests/test_lammps_atom_styles.py @@ -2,6 +2,7 @@ import os import unittest + import numpy as np from context import dpdata @@ -13,8 +14,8 @@ def setUp(self): """Set up test fixtures.""" # Create test data files for different atom styles self.test_files = {} - - # Full style test file + + # Full style test file full_content = """# LAMMPS data file - full style 2 atoms 2 atom types @@ -27,10 +28,10 @@ def setUp(self): 1 1 1 -0.8476 0.0 0.0 0.0 2 1 2 0.4238 1.2621856 0.7018028 0.5513885""" - self.test_files['full'] = '/tmp/test_full_style.lmp' - with open(self.test_files['full'], 'w') as f: + self.test_files["full"] = "/tmp/test_full_style.lmp" + with open(self.test_files["full"], "w") as f: f.write(full_content) - + # Charge style test file charge_content = """# LAMMPS data file - charge style 2 atoms @@ -44,10 +45,10 @@ def setUp(self): 1 1 -0.8476 0.0 0.0 0.0 2 2 0.4238 1.2621856 0.7018028 0.5513885""" - self.test_files['charge'] = '/tmp/test_charge_style.lmp' - with open(self.test_files['charge'], 'w') as f: + self.test_files["charge"] = "/tmp/test_charge_style.lmp" + with open(self.test_files["charge"], "w") as f: f.write(charge_content) - + # Bond style test file bond_content = """# LAMMPS data file - bond style 2 atoms @@ -61,8 +62,8 @@ def setUp(self): 1 1 1 0.0 0.0 0.0 2 1 2 1.2621856 0.7018028 0.5513885""" - self.test_files['bond'] = '/tmp/test_bond_style.lmp' - with open(self.test_files['bond'], 'w') as f: + self.test_files["bond"] = "/tmp/test_bond_style.lmp" + with open(self.test_files["bond"], "w") as f: f.write(bond_content) def tearDown(self): @@ -80,19 +81,22 @@ def test_atomic_style_backward_compatibility(self): def test_full_style_parsing(self): """Test parsing of full style LAMMPS data file.""" - system = dpdata.System(self.test_files['full'], fmt="lammps/lmp", - type_map=["O", "H"], atom_style="full") - + system = dpdata.System( + self.test_files["full"], + fmt="lammps/lmp", + type_map=["O", "H"], + atom_style="full", + ) + # Check basic structure self.assertEqual(len(system["atom_types"]), 2) self.assertEqual(system["atom_types"][0], 0) # type 1 -> O self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - + # Check coordinates - expected_coords = np.array([[0.0, 0.0, 0.0], - [1.2621856, 0.7018028, 0.5513885]]) + expected_coords = np.array([[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]) np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - + # Check charges are present self.assertIn("charges", system.data) expected_charges = np.array([-0.8476, 0.4238]) @@ -100,19 +104,22 @@ def test_full_style_parsing(self): def test_charge_style_parsing(self): """Test parsing of charge style LAMMPS data file.""" - system = dpdata.System(self.test_files['charge'], fmt="lammps/lmp", - type_map=["O", "H"], atom_style="charge") - + system = dpdata.System( + self.test_files["charge"], + fmt="lammps/lmp", + type_map=["O", "H"], + atom_style="charge", + ) + # Check basic structure self.assertEqual(len(system["atom_types"]), 2) self.assertEqual(system["atom_types"][0], 0) # type 1 -> O self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - + # Check coordinates - expected_coords = np.array([[0.0, 0.0, 0.0], - [1.2621856, 0.7018028, 0.5513885]]) + expected_coords = np.array([[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]) np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - + # Check charges are present self.assertIn("charges", system.data) expected_charges = np.array([-0.8476, 0.4238]) @@ -120,40 +127,53 @@ def test_charge_style_parsing(self): def test_bond_style_parsing(self): """Test parsing of bond style LAMMPS data file.""" - system = dpdata.System(self.test_files['bond'], fmt="lammps/lmp", - type_map=["O", "H"], atom_style="bond") - + system = dpdata.System( + self.test_files["bond"], + fmt="lammps/lmp", + type_map=["O", "H"], + atom_style="bond", + ) + # Check basic structure self.assertEqual(len(system["atom_types"]), 2) self.assertEqual(system["atom_types"][0], 0) # type 1 -> O self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - + # Check coordinates - expected_coords = np.array([[0.0, 0.0, 0.0], - [1.2621856, 0.7018028, 0.5513885]]) + expected_coords = np.array([[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]) np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - + # Bond style should not have charges self.assertNotIn("charges", system.data) def test_unsupported_atom_style(self): """Test that unsupported atom styles raise appropriate errors.""" with self.assertRaises(ValueError) as context: - dpdata.System(self.test_files['bond'], fmt="lammps/lmp", - type_map=["O", "H"], atom_style="unsupported_style") - + dpdata.System( + self.test_files["bond"], + fmt="lammps/lmp", + type_map=["O", "H"], + atom_style="unsupported_style", + ) + self.assertIn("Unsupported atom style", str(context.exception)) def test_default_atomic_style(self): """Test that default behavior is atomic style.""" # Test using existing atomic style file - system1 = dpdata.System(os.path.join("poscars", "conf.lmp"), type_map=["O", "H"]) - system2 = dpdata.System(os.path.join("poscars", "conf.lmp"), type_map=["O", "H"], atom_style="atomic") - + system1 = dpdata.System( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + system2 = dpdata.System( + os.path.join("poscars", "conf.lmp"), + type_map=["O", "H"], + atom_style="atomic", + ) + # Should be identical np.testing.assert_array_equal(system1["coords"], system2["coords"]) np.testing.assert_array_equal(system1["atom_types"], system2["atom_types"]) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From 4185c1de4c89ea7fcd2cbde188fa84ef29836a7f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 Aug 2025 13:53:56 +0000 Subject: [PATCH 06/10] Address review feedback: revert docs, add charge registration, use automatic detection in tests Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- docs/systems/system.md | 1 - dpdata/plugins/lammps.py | 13 +++++++++++++ tests/test_lammps_atom_styles.py | 6 ++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/systems/system.md b/docs/systems/system.md index 1689e7185..9f01fc40f 100644 --- a/docs/systems/system.md +++ b/docs/systems/system.md @@ -57,7 +57,6 @@ The data stored in {class}`System ` or {class}`LabeledSystem Date: Sat, 23 Aug 2025 05:45:12 +0000 Subject: [PATCH 07/10] Add tests for automatic detection without style comments and refactor to reduce code duplication Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- tests/test_lammps_atom_styles.py | 235 +++++++++++++++++++++---------- 1 file changed, 161 insertions(+), 74 deletions(-) diff --git a/tests/test_lammps_atom_styles.py b/tests/test_lammps_atom_styles.py index 86dd85258..daff44165 100644 --- a/tests/test_lammps_atom_styles.py +++ b/tests/test_lammps_atom_styles.py @@ -14,9 +14,11 @@ def setUp(self): """Set up test fixtures.""" # Create test data files for different atom styles self.test_files = {} - - # Full style test file - full_content = """# LAMMPS data file - full style + + # Test data configurations + self.test_configs = { + "full": { + "content": """# LAMMPS data file - full style 2 atoms 2 atom types 0.0 2.5243712 xlo xhi @@ -27,13 +29,13 @@ def setUp(self): Atoms # full 1 1 1 -0.8476 0.0 0.0 0.0 -2 1 2 0.4238 1.2621856 0.7018028 0.5513885""" - self.test_files["full"] = "/tmp/test_full_style.lmp" - with open(self.test_files["full"], "w") as f: - f.write(full_content) - - # Charge style test file - charge_content = """# LAMMPS data file - charge style +2 1 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + }, + "charge": { + "content": """# LAMMPS data file - charge style 2 atoms 2 atom types 0.0 2.5243712 xlo xhi @@ -44,13 +46,13 @@ def setUp(self): Atoms # charge 1 1 -0.8476 0.0 0.0 0.0 -2 2 0.4238 1.2621856 0.7018028 0.5513885""" - self.test_files["charge"] = "/tmp/test_charge_style.lmp" - with open(self.test_files["charge"], "w") as f: - f.write(charge_content) - - # Bond style test file - bond_content = """# LAMMPS data file - bond style +2 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + }, + "bond": { + "content": """# LAMMPS data file - bond style 2 atoms 2 atom types 0.0 2.5243712 xlo xhi @@ -61,16 +63,142 @@ def setUp(self): Atoms # bond 1 1 1 0.0 0.0 0.0 -2 1 2 1.2621856 0.7018028 0.5513885""" - self.test_files["bond"] = "/tmp/test_bond_style.lmp" - with open(self.test_files["bond"], "w") as f: - f.write(bond_content) +2 1 2 1.2621856 0.7018028 0.5513885""", + "has_charges": False, + "expected_charges": None, + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + }, + # Test files without style comments for heuristic detection + "full_no_comment": { + "content": """# LAMMPS data file - full style without comment +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms + +1 1 1 -0.8476 0.0 0.0 0.0 +2 1 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + }, + "charge_no_comment": { + "content": """# LAMMPS data file - charge style without comment +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms + +1 1 -0.8476 0.0 0.0 0.0 +2 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + }, + "bond_no_comment": { + "content": """# LAMMPS data file - bond style without comment +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms + +1 1 1 0.0 0.0 0.0 +2 1 2 1.2621856 0.7018028 0.5513885""", + "has_charges": False, + "expected_charges": None, + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + } + } + + # Create test files + for style, config in self.test_configs.items(): + filepath = f"/tmp/test_{style}_style.lmp" + self.test_files[style] = filepath + with open(filepath, "w") as f: + f.write(config["content"]) def tearDown(self): """Clean up test files.""" for file_path in self.test_files.values(): if os.path.exists(file_path): os.remove(file_path) + + def _load_system(self, style, explicit_style=None): + """Helper method to load a system with the given style. + + Parameters + ---------- + style : str + The style configuration key from self.test_configs + explicit_style : str, optional + Explicit atom_style parameter to pass to System() + + Returns + ------- + dpdata.System + Loaded system + """ + kwargs = { + "file_name": self.test_files[style], + "fmt": "lammps/lmp", + "type_map": ["O", "H"] + } + if explicit_style is not None: + kwargs["atom_style"] = explicit_style + + return dpdata.System(**kwargs) + + def _assert_basic_structure(self, system): + """Helper method to check basic system structure.""" + self.assertEqual(len(system["atom_types"]), 2) + self.assertEqual(system["atom_types"][0], 0) # type 1 -> O + self.assertEqual(system["atom_types"][1], 1) # type 2 -> H + + def _assert_coordinates(self, system, expected_coords): + """Helper method to check coordinates.""" + np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) + + def _assert_charges(self, system, expected_charges): + """Helper method to check charges.""" + if expected_charges is not None: + self.assertIn("charges", system.data) + np.testing.assert_allclose(system["charges"][0], expected_charges, atol=1e-6) + else: + self.assertNotIn("charges", system.data) + + def _test_style_parsing(self, style_key, explicit_style=None): + """Generic helper method to test style parsing. + + Parameters + ---------- + style_key : str + Key from self.test_configs to test + explicit_style : str, optional + Explicit atom_style to pass (for testing backward compatibility) + """ + config = self.test_configs[style_key] + system = self._load_system(style_key, explicit_style) + + # Check basic structure + self._assert_basic_structure(system) + + # Check coordinates + self._assert_coordinates(system, config["expected_coords"]) + + # Check charges + self._assert_charges(system, config["expected_charges"]) def test_atomic_style_backward_compatibility(self): """Test that atomic style still works (backward compatibility).""" @@ -81,68 +209,27 @@ def test_atomic_style_backward_compatibility(self): def test_full_style_parsing(self): """Test parsing of full style LAMMPS data file with automatic detection.""" - system = dpdata.System( - self.test_files["full"], - fmt="lammps/lmp", - type_map=["O", "H"], - ) - - # Check basic structure - self.assertEqual(len(system["atom_types"]), 2) - self.assertEqual(system["atom_types"][0], 0) # type 1 -> O - self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - - # Check coordinates - expected_coords = np.array([[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]) - np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - - # Check charges are present - self.assertIn("charges", system.data) - expected_charges = np.array([-0.8476, 0.4238]) - np.testing.assert_allclose(system["charges"][0], expected_charges, atol=1e-6) + self._test_style_parsing("full") def test_charge_style_parsing(self): """Test parsing of charge style LAMMPS data file with automatic detection.""" - system = dpdata.System( - self.test_files["charge"], - fmt="lammps/lmp", - type_map=["O", "H"], - ) - - # Check basic structure - self.assertEqual(len(system["atom_types"]), 2) - self.assertEqual(system["atom_types"][0], 0) # type 1 -> O - self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - - # Check coordinates - expected_coords = np.array([[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]) - np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - - # Check charges are present - self.assertIn("charges", system.data) - expected_charges = np.array([-0.8476, 0.4238]) - np.testing.assert_allclose(system["charges"][0], expected_charges, atol=1e-6) + self._test_style_parsing("charge") def test_bond_style_parsing(self): """Test parsing of bond style LAMMPS data file.""" - system = dpdata.System( - self.test_files["bond"], - fmt="lammps/lmp", - type_map=["O", "H"], - atom_style="bond", - ) + self._test_style_parsing("bond", explicit_style="bond") - # Check basic structure - self.assertEqual(len(system["atom_types"]), 2) - self.assertEqual(system["atom_types"][0], 0) # type 1 -> O - self.assertEqual(system["atom_types"][1], 1) # type 2 -> H + def test_full_style_no_comment_detection(self): + """Test automatic detection of full style without style comment.""" + self._test_style_parsing("full_no_comment") - # Check coordinates - expected_coords = np.array([[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]]) - np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) + def test_charge_style_no_comment_detection(self): + """Test automatic detection of charge style without style comment.""" + self._test_style_parsing("charge_no_comment") - # Bond style should not have charges - self.assertNotIn("charges", system.data) + def test_bond_style_no_comment_detection(self): + """Test automatic detection of bond style without style comment.""" + self._test_style_parsing("bond_no_comment") def test_unsupported_atom_style(self): """Test that unsupported atom styles raise appropriate errors.""" From 1e6356f29bcaaa6c94ae1fc0a2144da490246834 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 23 Aug 2025 05:51:04 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_lammps_atom_styles.py | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/tests/test_lammps_atom_styles.py b/tests/test_lammps_atom_styles.py index daff44165..8ac8be339 100644 --- a/tests/test_lammps_atom_styles.py +++ b/tests/test_lammps_atom_styles.py @@ -14,7 +14,7 @@ def setUp(self): """Set up test fixtures.""" # Create test data files for different atom styles self.test_files = {} - + # Test data configurations self.test_configs = { "full": { @@ -32,7 +32,7 @@ def setUp(self): 2 1 2 0.4238 1.2621856 0.7018028 0.5513885""", "has_charges": True, "expected_charges": [-0.8476, 0.4238], - "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], }, "charge": { "content": """# LAMMPS data file - charge style @@ -49,7 +49,7 @@ def setUp(self): 2 2 0.4238 1.2621856 0.7018028 0.5513885""", "has_charges": True, "expected_charges": [-0.8476, 0.4238], - "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], }, "bond": { "content": """# LAMMPS data file - bond style @@ -66,7 +66,7 @@ def setUp(self): 2 1 2 1.2621856 0.7018028 0.5513885""", "has_charges": False, "expected_charges": None, - "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], }, # Test files without style comments for heuristic detection "full_no_comment": { @@ -84,7 +84,7 @@ def setUp(self): 2 1 2 0.4238 1.2621856 0.7018028 0.5513885""", "has_charges": True, "expected_charges": [-0.8476, 0.4238], - "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], }, "charge_no_comment": { "content": """# LAMMPS data file - charge style without comment @@ -101,7 +101,7 @@ def setUp(self): 2 2 0.4238 1.2621856 0.7018028 0.5513885""", "has_charges": True, "expected_charges": [-0.8476, 0.4238], - "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], }, "bond_no_comment": { "content": """# LAMMPS data file - bond style without comment @@ -118,10 +118,10 @@ def setUp(self): 2 1 2 1.2621856 0.7018028 0.5513885""", "has_charges": False, "expected_charges": None, - "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]] - } + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], + }, } - + # Create test files for style, config in self.test_configs.items(): filepath = f"/tmp/test_{style}_style.lmp" @@ -134,17 +134,17 @@ def tearDown(self): for file_path in self.test_files.values(): if os.path.exists(file_path): os.remove(file_path) - + def _load_system(self, style, explicit_style=None): """Helper method to load a system with the given style. - + Parameters ---------- style : str The style configuration key from self.test_configs explicit_style : str, optional Explicit atom_style parameter to pass to System() - + Returns ------- dpdata.System @@ -153,34 +153,36 @@ def _load_system(self, style, explicit_style=None): kwargs = { "file_name": self.test_files[style], "fmt": "lammps/lmp", - "type_map": ["O", "H"] + "type_map": ["O", "H"], } if explicit_style is not None: kwargs["atom_style"] = explicit_style - + return dpdata.System(**kwargs) - + def _assert_basic_structure(self, system): """Helper method to check basic system structure.""" self.assertEqual(len(system["atom_types"]), 2) self.assertEqual(system["atom_types"][0], 0) # type 1 -> O self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - + def _assert_coordinates(self, system, expected_coords): """Helper method to check coordinates.""" np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - + def _assert_charges(self, system, expected_charges): """Helper method to check charges.""" if expected_charges is not None: self.assertIn("charges", system.data) - np.testing.assert_allclose(system["charges"][0], expected_charges, atol=1e-6) + np.testing.assert_allclose( + system["charges"][0], expected_charges, atol=1e-6 + ) else: self.assertNotIn("charges", system.data) - + def _test_style_parsing(self, style_key, explicit_style=None): """Generic helper method to test style parsing. - + Parameters ---------- style_key : str @@ -190,13 +192,13 @@ def _test_style_parsing(self, style_key, explicit_style=None): """ config = self.test_configs[style_key] system = self._load_system(style_key, explicit_style) - + # Check basic structure self._assert_basic_structure(system) - + # Check coordinates self._assert_coordinates(system, config["expected_coords"]) - + # Check charges self._assert_charges(system, config["expected_charges"]) From 54d97098e9dec706bb9196b77510cbaba6ba8b03 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 23 Aug 2025 05:59:25 +0000 Subject: [PATCH 09/10] Add type hints to all new methods for LAMMPS atom styles support Co-authored-by: njzjz <9496702+njzjz@users.noreply.github.com> --- dpdata/lammps/lmp.py | 18 +++++++++--------- dpdata/plugins/lammps.py | 2 +- tests/test_lammps_atom_styles.py | 32 ++++++++++++++++---------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index 4718bbadb..d0c9528a3 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -21,7 +21,7 @@ } -def detect_atom_style(lines): +def detect_atom_style(lines: list[str]) -> str | None: """Detect LAMMPS atom style from data file content. Parameters @@ -171,7 +171,7 @@ def _atom_info_atom(line): return int(vec[0]), int(vec[1]), float(vec[2]), float(vec[3]), float(vec[4]) -def _atom_info_style(line, atom_style="atomic"): +def _atom_info_style(line: str, atom_style: str = "atomic") -> dict[str, int | float]: """Parse atom information based on the specified atom style. Parameters @@ -216,7 +216,7 @@ def _atom_info_style(line, atom_style="atomic"): return result -def get_natoms_vec(lines, atom_style="atomic"): +def get_natoms_vec(lines: list[str], atom_style: str = "atomic") -> list[int]: """Get number of atoms for each atom type. Parameters @@ -240,7 +240,7 @@ def get_natoms_vec(lines, atom_style="atomic"): return natoms_vec -def get_atype(lines, type_idx_zero=False, atom_style="atomic"): +def get_atype(lines: list[str], type_idx_zero: bool = False, atom_style: str = "atomic") -> np.ndarray: """Get atom types from LAMMPS data file. Parameters @@ -269,7 +269,7 @@ def get_atype(lines, type_idx_zero=False, atom_style="atomic"): return np.array(atype, dtype=int) -def get_posi(lines, atom_style="atomic"): +def get_posi(lines: list[str], atom_style: str = "atomic") -> np.ndarray: """Get atomic positions from LAMMPS data file. Parameters @@ -292,7 +292,7 @@ def get_posi(lines, atom_style="atomic"): return np.array(posis) -def get_charges(lines, atom_style="atomic"): +def get_charges(lines: list[str], atom_style: str = "atomic") -> np.ndarray | None: """Get atomic charges from LAMMPS data file if the atom style supports charges. Parameters @@ -322,7 +322,7 @@ def get_charges(lines, atom_style="atomic"): return np.array(charges) -def get_spins(lines, atom_style="atomic"): +def get_spins(lines: list[str], atom_style: str = "atomic") -> np.ndarray | None: atom_lines = get_atoms(lines) if len(atom_lines[0].split()) < 8: return None @@ -356,7 +356,7 @@ def get_lmpbox(lines): return box_info, tilt -def system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): +def system_data(lines: list[str], type_map: list[str] | None = None, type_idx_zero: bool = True, atom_style: str = "atomic") -> dict: """Parse LAMMPS data file to system data format. Parameters @@ -409,7 +409,7 @@ def system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): return system -def to_system_data(lines, type_map=None, type_idx_zero=True, atom_style="atomic"): +def to_system_data(lines: list[str], type_map: list[str] | None = None, type_idx_zero: bool = True, atom_style: str = "atomic") -> dict: """Parse LAMMPS data file to system data format. Parameters diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index 44819ed23..9949e99e7 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -26,7 +26,7 @@ def register_spin(data): dpdata.System.register_data_type(dt) -def register_charge(data): +def register_charge(data: dict) -> None: if "charges" in data: dt = DataType( "charges", diff --git a/tests/test_lammps_atom_styles.py b/tests/test_lammps_atom_styles.py index 8ac8be339..0a68fa1b0 100644 --- a/tests/test_lammps_atom_styles.py +++ b/tests/test_lammps_atom_styles.py @@ -10,7 +10,7 @@ class TestLammpsAtomStyles(unittest.TestCase): """Test support for different LAMMPS atom styles.""" - def setUp(self): + def setUp(self) -> None: """Set up test fixtures.""" # Create test data files for different atom styles self.test_files = {} @@ -129,13 +129,13 @@ def setUp(self): with open(filepath, "w") as f: f.write(config["content"]) - def tearDown(self): + def tearDown(self) -> None: """Clean up test files.""" for file_path in self.test_files.values(): if os.path.exists(file_path): os.remove(file_path) - def _load_system(self, style, explicit_style=None): + def _load_system(self, style: str, explicit_style: str | None = None) -> "dpdata.System": """Helper method to load a system with the given style. Parameters @@ -160,17 +160,17 @@ def _load_system(self, style, explicit_style=None): return dpdata.System(**kwargs) - def _assert_basic_structure(self, system): + def _assert_basic_structure(self, system: "dpdata.System") -> None: """Helper method to check basic system structure.""" self.assertEqual(len(system["atom_types"]), 2) self.assertEqual(system["atom_types"][0], 0) # type 1 -> O self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - def _assert_coordinates(self, system, expected_coords): + def _assert_coordinates(self, system: "dpdata.System", expected_coords: list[list[float]]) -> None: """Helper method to check coordinates.""" np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - def _assert_charges(self, system, expected_charges): + def _assert_charges(self, system: "dpdata.System", expected_charges: list[float] | None) -> None: """Helper method to check charges.""" if expected_charges is not None: self.assertIn("charges", system.data) @@ -180,7 +180,7 @@ def _assert_charges(self, system, expected_charges): else: self.assertNotIn("charges", system.data) - def _test_style_parsing(self, style_key, explicit_style=None): + def _test_style_parsing(self, style_key: str, explicit_style: str | None = None) -> None: """Generic helper method to test style parsing. Parameters @@ -202,38 +202,38 @@ def _test_style_parsing(self, style_key, explicit_style=None): # Check charges self._assert_charges(system, config["expected_charges"]) - def test_atomic_style_backward_compatibility(self): + def test_atomic_style_backward_compatibility(self) -> None: """Test that atomic style still works (backward compatibility).""" system = dpdata.System(os.path.join("poscars", "conf.lmp"), type_map=["O", "H"]) self.assertEqual(len(system["atom_types"]), 2) self.assertEqual(system["atom_types"][0], 0) # O self.assertEqual(system["atom_types"][1], 1) # H - def test_full_style_parsing(self): + def test_full_style_parsing(self) -> None: """Test parsing of full style LAMMPS data file with automatic detection.""" self._test_style_parsing("full") - def test_charge_style_parsing(self): + def test_charge_style_parsing(self) -> None: """Test parsing of charge style LAMMPS data file with automatic detection.""" self._test_style_parsing("charge") - def test_bond_style_parsing(self): + def test_bond_style_parsing(self) -> None: """Test parsing of bond style LAMMPS data file.""" self._test_style_parsing("bond", explicit_style="bond") - def test_full_style_no_comment_detection(self): + def test_full_style_no_comment_detection(self) -> None: """Test automatic detection of full style without style comment.""" self._test_style_parsing("full_no_comment") - def test_charge_style_no_comment_detection(self): + def test_charge_style_no_comment_detection(self) -> None: """Test automatic detection of charge style without style comment.""" self._test_style_parsing("charge_no_comment") - def test_bond_style_no_comment_detection(self): + def test_bond_style_no_comment_detection(self) -> None: """Test automatic detection of bond style without style comment.""" self._test_style_parsing("bond_no_comment") - def test_unsupported_atom_style(self): + def test_unsupported_atom_style(self) -> None: """Test that unsupported atom styles raise appropriate errors.""" with self.assertRaises(ValueError) as context: dpdata.System( @@ -245,7 +245,7 @@ def test_unsupported_atom_style(self): self.assertIn("Unsupported atom style", str(context.exception)) - def test_default_atomic_style(self): + def test_default_atomic_style(self) -> None: """Test that default behavior is atomic style.""" # Test using existing atomic style file system1 = dpdata.System( From 77d05602cd3e7bdb3e693be4e53c73f6b22b5727 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 23 Aug 2025 06:02:09 +0000 Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dpdata/lammps/lmp.py | 18 +++++++++++++++--- tests/test_lammps_atom_styles.py | 18 +++++++++++++----- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index d0c9528a3..e259aa5c6 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -240,7 +240,9 @@ def get_natoms_vec(lines: list[str], atom_style: str = "atomic") -> list[int]: return natoms_vec -def get_atype(lines: list[str], type_idx_zero: bool = False, atom_style: str = "atomic") -> np.ndarray: +def get_atype( + lines: list[str], type_idx_zero: bool = False, atom_style: str = "atomic" +) -> np.ndarray: """Get atom types from LAMMPS data file. Parameters @@ -356,7 +358,12 @@ def get_lmpbox(lines): return box_info, tilt -def system_data(lines: list[str], type_map: list[str] | None = None, type_idx_zero: bool = True, atom_style: str = "atomic") -> dict: +def system_data( + lines: list[str], + type_map: list[str] | None = None, + type_idx_zero: bool = True, + atom_style: str = "atomic", +) -> dict: """Parse LAMMPS data file to system data format. Parameters @@ -409,7 +416,12 @@ def system_data(lines: list[str], type_map: list[str] | None = None, type_idx_ze return system -def to_system_data(lines: list[str], type_map: list[str] | None = None, type_idx_zero: bool = True, atom_style: str = "atomic") -> dict: +def to_system_data( + lines: list[str], + type_map: list[str] | None = None, + type_idx_zero: bool = True, + atom_style: str = "atomic", +) -> dict: """Parse LAMMPS data file to system data format. Parameters diff --git a/tests/test_lammps_atom_styles.py b/tests/test_lammps_atom_styles.py index 0a68fa1b0..04c98c66f 100644 --- a/tests/test_lammps_atom_styles.py +++ b/tests/test_lammps_atom_styles.py @@ -135,7 +135,9 @@ def tearDown(self) -> None: if os.path.exists(file_path): os.remove(file_path) - def _load_system(self, style: str, explicit_style: str | None = None) -> "dpdata.System": + def _load_system( + self, style: str, explicit_style: str | None = None + ) -> dpdata.System: """Helper method to load a system with the given style. Parameters @@ -160,17 +162,21 @@ def _load_system(self, style: str, explicit_style: str | None = None) -> "dpdata return dpdata.System(**kwargs) - def _assert_basic_structure(self, system: "dpdata.System") -> None: + def _assert_basic_structure(self, system: dpdata.System) -> None: """Helper method to check basic system structure.""" self.assertEqual(len(system["atom_types"]), 2) self.assertEqual(system["atom_types"][0], 0) # type 1 -> O self.assertEqual(system["atom_types"][1], 1) # type 2 -> H - def _assert_coordinates(self, system: "dpdata.System", expected_coords: list[list[float]]) -> None: + def _assert_coordinates( + self, system: dpdata.System, expected_coords: list[list[float]] + ) -> None: """Helper method to check coordinates.""" np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) - def _assert_charges(self, system: "dpdata.System", expected_charges: list[float] | None) -> None: + def _assert_charges( + self, system: dpdata.System, expected_charges: list[float] | None + ) -> None: """Helper method to check charges.""" if expected_charges is not None: self.assertIn("charges", system.data) @@ -180,7 +186,9 @@ def _assert_charges(self, system: "dpdata.System", expected_charges: list[float] else: self.assertNotIn("charges", system.data) - def _test_style_parsing(self, style_key: str, explicit_style: str | None = None) -> None: + def _test_style_parsing( + self, style_key: str, explicit_style: str | None = None + ) -> None: """Generic helper method to test style parsing. Parameters