diff --git a/dpdata/lammps/lmp.py b/dpdata/lammps/lmp.py index 38084d6e..e259aa5c 100644 --- a/dpdata/lammps/lmp.py +++ b/dpdata/lammps/lmp.py @@ -7,6 +7,82 @@ ptr_int_fmt = "%6d" ptr_key_fmt = "%15s" +# Mapping of LAMMPS atom styles to their column layouts +# Format: (atom_id_col, atom_type_col, x_col, y_col, z_col, has_molecule_id, has_charge, charge_col) +ATOM_STYLE_COLUMNS = { + "atomic": (0, 1, 2, 3, 4, False, False, None), + "angle": (0, 2, 3, 4, 5, True, False, None), + "bond": (0, 2, 3, 4, 5, True, False, None), + "charge": (0, 1, 3, 4, 5, False, True, 2), + "full": (0, 2, 4, 5, 6, True, True, 3), + "molecular": (0, 2, 3, 4, 5, True, False, None), + "dipole": (0, 1, 3, 4, 5, False, True, 2), + "sphere": (0, 1, 4, 5, 6, False, False, None), +} + + +def detect_atom_style(lines: list[str]) -> str | None: + """Detect LAMMPS atom style from data file content. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + + Returns + ------- + str or None + Detected atom style, or None if not detected + """ + # Look for atom style in comments after "Atoms" section header + atom_lines = get_atoms(lines) + if not atom_lines: + return None + + # Find the "Atoms" line + for idx, line in enumerate(lines): + if "Atoms" in line: + # Check if there's a comment with atom style after "Atoms" + if "#" in line: + comment_part = line.split("#")[1].strip().lower() + for style in ATOM_STYLE_COLUMNS: + if style in comment_part: + return style + break + + # If no explicit style found, try to infer from first data line + if atom_lines: + first_line = atom_lines[0].split() + num_cols = len(first_line) + + # Try to match based on number of columns and content patterns + # This is a heuristic approach + if num_cols == 5: + # Could be atomic style: atom-ID atom-type x y z + return "atomic" + elif num_cols == 6: + # Could be charge or bond/molecular style + # Try to determine if column 2 (index 2) looks like a charge (float) or type (int) + try: + val = float(first_line[2]) + # If it's a small float, likely a charge + if abs(val) < 10 and val != int(val): + return "charge" + else: + # Likely molecule ID (integer), so bond/molecular style + return "bond" + except ValueError: + return "atomic" # fallback + elif num_cols == 7: + # Could be full style: atom-ID molecule-ID atom-type charge x y z + return "full" + elif num_cols >= 8: + # Could be dipole or sphere style + # For now, default to dipole if we have enough columns + return "dipole" + + return None # Unable to detect + def _get_block(lines, keys): for idx in range(len(lines)): @@ -95,8 +171,67 @@ def _atom_info_atom(line): return int(vec[0]), int(vec[1]), float(vec[2]), float(vec[3]), float(vec[4]) -def get_natoms_vec(lines): - atype = get_atype(lines) +def _atom_info_style(line: str, atom_style: str = "atomic") -> dict[str, int | float]: + """Parse atom information based on the specified atom style. + + Parameters + ---------- + line : str + The atom line from LAMMPS data file + atom_style : str + The LAMMPS atom style (atomic, full, charge, etc.) + + Returns + ------- + dict + Dictionary containing parsed atom information with keys: + 'atom_id', 'atom_type', 'x', 'y', 'z', 'molecule_id' (if present), 'charge' (if present) + """ + if atom_style not in ATOM_STYLE_COLUMNS: + raise ValueError( + f"Unsupported atom style: {atom_style}. Supported styles: {list(ATOM_STYLE_COLUMNS.keys())}" + ) + + vec = line.split() + columns = ATOM_STYLE_COLUMNS[atom_style] + + result = { + "atom_id": int(vec[columns[0]]), + "atom_type": int(vec[columns[1]]), + "x": float(vec[columns[2]]), + "y": float(vec[columns[3]]), + "z": float(vec[columns[4]]), + } + + # Add molecule ID if present + if columns[5]: # has_molecule_id + result["molecule_id"] = int( + vec[1] + ) # molecule ID is always in column 1 when present + + # Add charge if present + if columns[6]: # has_charge + result["charge"] = float(vec[columns[7]]) # charge_col + + return result + + +def get_natoms_vec(lines: list[str], atom_style: str = "atomic") -> list[int]: + """Get number of atoms for each atom type. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + atom_style : str + The LAMMPS atom style + + Returns + ------- + list + Number of atoms for each atom type + """ + atype = get_atype(lines, atom_style=atom_style) natoms_vec = [] natomtypes = get_natomtypes(lines) for ii in range(natomtypes): @@ -105,12 +240,30 @@ def get_natoms_vec(lines): return natoms_vec -def get_atype(lines, type_idx_zero=False): +def get_atype( + lines: list[str], type_idx_zero: bool = False, atom_style: str = "atomic" +) -> np.ndarray: + """Get atom types from LAMMPS data file. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + type_idx_zero : bool + Whether to use zero-based indexing for atom types + atom_style : str + The LAMMPS atom style + + Returns + ------- + np.ndarray + Array of atom types + """ alines = get_atoms(lines) atype = [] for ii in alines: - # idx, mt, at, q, x, y, z = _atom_info_mol(ii) - idx, at, x, y, z = _atom_info_atom(ii) + atom_info = _atom_info_style(ii, atom_style) + at = atom_info["atom_type"] if type_idx_zero: atype.append(at - 1) else: @@ -118,16 +271,60 @@ def get_atype(lines, type_idx_zero=False): return np.array(atype, dtype=int) -def get_posi(lines): +def get_posi(lines: list[str], atom_style: str = "atomic") -> np.ndarray: + """Get atomic positions from LAMMPS data file. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + atom_style : str + The LAMMPS atom style + + Returns + ------- + np.ndarray + Array of atomic positions + """ atom_lines = get_atoms(lines) posis = [] for ii in atom_lines: - # posis.append([float(jj) for jj in ii.split()[4:7]]) - posis.append([float(jj) for jj in ii.split()[2:5]]) + atom_info = _atom_info_style(ii, atom_style) + posis.append([atom_info["x"], atom_info["y"], atom_info["z"]]) return np.array(posis) -def get_spins(lines): +def get_charges(lines: list[str], atom_style: str = "atomic") -> np.ndarray | None: + """Get atomic charges from LAMMPS data file if the atom style supports charges. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + atom_style : str + The LAMMPS atom style + + Returns + ------- + np.ndarray or None + Array of atomic charges if atom style has charges, None otherwise + """ + if atom_style not in ATOM_STYLE_COLUMNS: + raise ValueError(f"Unsupported atom style: {atom_style}") + + # Check if this atom style has charges + if not ATOM_STYLE_COLUMNS[atom_style][6]: # has_charge + return None + + atom_lines = get_atoms(lines) + charges = [] + for ii in atom_lines: + atom_info = _atom_info_style(ii, atom_style) + charges.append(atom_info["charge"]) + return np.array(charges) + + +def get_spins(lines: list[str], atom_style: str = "atomic") -> np.ndarray | None: atom_lines = get_atoms(lines) if len(atom_lines[0].split()) < 8: return None @@ -161,9 +358,32 @@ def get_lmpbox(lines): return box_info, tilt -def system_data(lines, type_map=None, type_idx_zero=True): +def system_data( + lines: list[str], + type_map: list[str] | None = None, + type_idx_zero: bool = True, + atom_style: str = "atomic", +) -> dict: + """Parse LAMMPS data file to system data format. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + type_map : list, optional + Mapping from atom types to element names + type_idx_zero : bool + Whether to use zero-based indexing for atom types + atom_style : str + The LAMMPS atom style (atomic, full, charge, etc.) + + Returns + ------- + dict + System data dictionary + """ system = {} - system["atom_numbs"] = get_natoms_vec(lines) + system["atom_numbs"] = get_natoms_vec(lines, atom_style=atom_style) system["atom_names"] = [] if type_map is None: for ii in range(len(system["atom_numbs"])): @@ -177,20 +397,61 @@ def system_data(lines, type_map=None, type_idx_zero=True): system["orig"] = np.array(orig) system["cells"] = [np.array(cell)] natoms = sum(system["atom_numbs"]) - system["atom_types"] = get_atype(lines, type_idx_zero=type_idx_zero) - system["coords"] = [get_posi(lines)] + system["atom_types"] = get_atype( + lines, type_idx_zero=type_idx_zero, atom_style=atom_style + ) + system["coords"] = [get_posi(lines, atom_style=atom_style)] system["cells"] = np.array(system["cells"]) system["coords"] = np.array(system["coords"]) - spins = get_spins(lines) + # Add charges if the atom style supports them + charges = get_charges(lines, atom_style=atom_style) + if charges is not None: + system["charges"] = np.array([charges]) + + spins = get_spins(lines, atom_style=atom_style) if spins is not None: system["spins"] = np.array([spins]) return system -def to_system_data(lines, type_map=None, type_idx_zero=True): - return system_data(lines, type_map=type_map, type_idx_zero=type_idx_zero) +def to_system_data( + lines: list[str], + type_map: list[str] | None = None, + type_idx_zero: bool = True, + atom_style: str = "atomic", +) -> dict: + """Parse LAMMPS data file to system data format. + + Parameters + ---------- + lines : list + Lines from LAMMPS data file + type_map : list, optional + Mapping from atom types to element names + type_idx_zero : bool + Whether to use zero-based indexing for atom types + atom_style : str + The LAMMPS atom style. If "auto", attempts to detect automatically + from file. Default is "atomic". + + Returns + ------- + dict + System data dictionary + """ + # Attempt automatic detection if requested + if atom_style == "auto": + detected_style = detect_atom_style(lines) + if detected_style: + atom_style = detected_style + else: + atom_style = "atomic" # fallback to default + + return system_data( + lines, type_map=type_map, type_idx_zero=type_idx_zero, atom_style=atom_style + ) def rotate_to_lower_triangle( diff --git a/dpdata/plugins/lammps.py b/dpdata/plugins/lammps.py index c7e5c765..9949e99e 100644 --- a/dpdata/plugins/lammps.py +++ b/dpdata/plugins/lammps.py @@ -26,15 +26,86 @@ def register_spin(data): dpdata.System.register_data_type(dt) +def register_charge(data: dict) -> None: + if "charges" in data: + dt = DataType( + "charges", + np.ndarray, + (Axis.NFRAMES, Axis.NATOMS), + required=False, + deepmd_name="charge", + ) + dpdata.System.register_data_type(dt) + + @Format.register("lmp") @Format.register("lammps/lmp") class LAMMPSLmpFormat(Format): @Format.post("shift_orig_zero") - def from_system(self, file_name: FileType, type_map=None, **kwargs): + def from_system( + self, file_name: FileType, type_map=None, atom_style="auto", **kwargs + ): + """Load LAMMPS data file to system data format. + + This method supports multiple LAMMPS atom styles with automatic charge extraction + and maintains backward compatibility. The parser can automatically detect the atom + style from the LAMMPS data file header when possible. + + Parameters + ---------- + file_name : str or Path + Path to LAMMPS data file + type_map : list, optional + Mapping from atom types to element names + atom_style : str, optional + The LAMMPS atom style. Default is "auto" which attempts to detect + the style automatically from the file. Can also be explicitly set to: + atomic, full, charge, bond, angle, molecular, dipole, sphere + **kwargs : dict + Other parameters + + Returns + ------- + dict + System data dictionary with additional data based on atom style: + - charges: For styles with charge information (full, charge, dipole) + - molecule_ids: For styles with molecule information (full, bond, angle, molecular) + + Examples + -------- + Load LAMMPS data with automatic detection: + + >>> system = dpdata.System("data.lmp", type_map=["O", "H"]) + + Load with specific atom styles: + + >>> # Full style with charges and molecule IDs + >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="full") + >>> print(system["charges"]) # Access extracted charges + + >>> # Charge style with charges only + >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="charge") + + >>> # Bond/molecular styles with molecule IDs + >>> system = dpdata.System("data.lmp", type_map=["O", "H"], atom_style="bond") + + Notes + ----- + Atom Style Column Layouts: + - atomic: atom-ID atom-type x y z (default) + - full: atom-ID molecule-ID atom-type charge x y z + - charge: atom-ID atom-type charge x y z + - bond: atom-ID molecule-ID atom-type x y z + - angle: atom-ID molecule-ID atom-type x y z + - molecular: atom-ID molecule-ID atom-type x y z + - dipole: atom-ID atom-type charge x y z mux muy muz + - sphere: atom-ID atom-type diameter density x y z + """ with open_file(file_name) as fp: lines = [line.rstrip("\n") for line in fp] - data = dpdata.lammps.lmp.to_system_data(lines, type_map) + data = dpdata.lammps.lmp.to_system_data(lines, type_map, atom_style=atom_style) register_spin(data) + register_charge(data) return data def to_system(self, data, file_name: FileType, frame_idx=0, **kwargs): diff --git a/tests/test_lammps_atom_styles.py b/tests/test_lammps_atom_styles.py new file mode 100644 index 00000000..04c98c66 --- /dev/null +++ b/tests/test_lammps_atom_styles.py @@ -0,0 +1,274 @@ +from __future__ import annotations + +import os +import unittest + +import numpy as np +from context import dpdata + + +class TestLammpsAtomStyles(unittest.TestCase): + """Test support for different LAMMPS atom styles.""" + + def setUp(self) -> None: + """Set up test fixtures.""" + # Create test data files for different atom styles + self.test_files = {} + + # Test data configurations + self.test_configs = { + "full": { + "content": """# LAMMPS data file - full style +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms # full + +1 1 1 -0.8476 0.0 0.0 0.0 +2 1 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], + }, + "charge": { + "content": """# LAMMPS data file - charge style +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms # charge + +1 1 -0.8476 0.0 0.0 0.0 +2 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], + }, + "bond": { + "content": """# LAMMPS data file - bond style +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms # bond + +1 1 1 0.0 0.0 0.0 +2 1 2 1.2621856 0.7018028 0.5513885""", + "has_charges": False, + "expected_charges": None, + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], + }, + # Test files without style comments for heuristic detection + "full_no_comment": { + "content": """# LAMMPS data file - full style without comment +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms + +1 1 1 -0.8476 0.0 0.0 0.0 +2 1 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], + }, + "charge_no_comment": { + "content": """# LAMMPS data file - charge style without comment +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms + +1 1 -0.8476 0.0 0.0 0.0 +2 2 0.4238 1.2621856 0.7018028 0.5513885""", + "has_charges": True, + "expected_charges": [-0.8476, 0.4238], + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], + }, + "bond_no_comment": { + "content": """# LAMMPS data file - bond style without comment +2 atoms +2 atom types +0.0 2.5243712 xlo xhi +0.0 2.0430257 ylo yhi +0.0 2.2254033 zlo zhi +1.2621856 1.2874292 0.7485898 xy xz yz + +Atoms + +1 1 1 0.0 0.0 0.0 +2 1 2 1.2621856 0.7018028 0.5513885""", + "has_charges": False, + "expected_charges": None, + "expected_coords": [[0.0, 0.0, 0.0], [1.2621856, 0.7018028, 0.5513885]], + }, + } + + # Create test files + for style, config in self.test_configs.items(): + filepath = f"/tmp/test_{style}_style.lmp" + self.test_files[style] = filepath + with open(filepath, "w") as f: + f.write(config["content"]) + + def tearDown(self) -> None: + """Clean up test files.""" + for file_path in self.test_files.values(): + if os.path.exists(file_path): + os.remove(file_path) + + def _load_system( + self, style: str, explicit_style: str | None = None + ) -> dpdata.System: + """Helper method to load a system with the given style. + + Parameters + ---------- + style : str + The style configuration key from self.test_configs + explicit_style : str, optional + Explicit atom_style parameter to pass to System() + + Returns + ------- + dpdata.System + Loaded system + """ + kwargs = { + "file_name": self.test_files[style], + "fmt": "lammps/lmp", + "type_map": ["O", "H"], + } + if explicit_style is not None: + kwargs["atom_style"] = explicit_style + + return dpdata.System(**kwargs) + + def _assert_basic_structure(self, system: dpdata.System) -> None: + """Helper method to check basic system structure.""" + self.assertEqual(len(system["atom_types"]), 2) + self.assertEqual(system["atom_types"][0], 0) # type 1 -> O + self.assertEqual(system["atom_types"][1], 1) # type 2 -> H + + def _assert_coordinates( + self, system: dpdata.System, expected_coords: list[list[float]] + ) -> None: + """Helper method to check coordinates.""" + np.testing.assert_allclose(system["coords"][0], expected_coords, atol=1e-6) + + def _assert_charges( + self, system: dpdata.System, expected_charges: list[float] | None + ) -> None: + """Helper method to check charges.""" + if expected_charges is not None: + self.assertIn("charges", system.data) + np.testing.assert_allclose( + system["charges"][0], expected_charges, atol=1e-6 + ) + else: + self.assertNotIn("charges", system.data) + + def _test_style_parsing( + self, style_key: str, explicit_style: str | None = None + ) -> None: + """Generic helper method to test style parsing. + + Parameters + ---------- + style_key : str + Key from self.test_configs to test + explicit_style : str, optional + Explicit atom_style to pass (for testing backward compatibility) + """ + config = self.test_configs[style_key] + system = self._load_system(style_key, explicit_style) + + # Check basic structure + self._assert_basic_structure(system) + + # Check coordinates + self._assert_coordinates(system, config["expected_coords"]) + + # Check charges + self._assert_charges(system, config["expected_charges"]) + + def test_atomic_style_backward_compatibility(self) -> None: + """Test that atomic style still works (backward compatibility).""" + system = dpdata.System(os.path.join("poscars", "conf.lmp"), type_map=["O", "H"]) + self.assertEqual(len(system["atom_types"]), 2) + self.assertEqual(system["atom_types"][0], 0) # O + self.assertEqual(system["atom_types"][1], 1) # H + + def test_full_style_parsing(self) -> None: + """Test parsing of full style LAMMPS data file with automatic detection.""" + self._test_style_parsing("full") + + def test_charge_style_parsing(self) -> None: + """Test parsing of charge style LAMMPS data file with automatic detection.""" + self._test_style_parsing("charge") + + def test_bond_style_parsing(self) -> None: + """Test parsing of bond style LAMMPS data file.""" + self._test_style_parsing("bond", explicit_style="bond") + + def test_full_style_no_comment_detection(self) -> None: + """Test automatic detection of full style without style comment.""" + self._test_style_parsing("full_no_comment") + + def test_charge_style_no_comment_detection(self) -> None: + """Test automatic detection of charge style without style comment.""" + self._test_style_parsing("charge_no_comment") + + def test_bond_style_no_comment_detection(self) -> None: + """Test automatic detection of bond style without style comment.""" + self._test_style_parsing("bond_no_comment") + + def test_unsupported_atom_style(self) -> None: + """Test that unsupported atom styles raise appropriate errors.""" + with self.assertRaises(ValueError) as context: + dpdata.System( + self.test_files["bond"], + fmt="lammps/lmp", + type_map=["O", "H"], + atom_style="unsupported_style", + ) + + self.assertIn("Unsupported atom style", str(context.exception)) + + def test_default_atomic_style(self) -> None: + """Test that default behavior is atomic style.""" + # Test using existing atomic style file + system1 = dpdata.System( + os.path.join("poscars", "conf.lmp"), type_map=["O", "H"] + ) + system2 = dpdata.System( + os.path.join("poscars", "conf.lmp"), + type_map=["O", "H"], + atom_style="atomic", + ) + + # Should be identical + np.testing.assert_array_equal(system1["coords"], system2["coords"]) + np.testing.assert_array_equal(system1["atom_types"], system2["atom_types"]) + + +if __name__ == "__main__": + unittest.main()