diff --git a/package/CHANGELOG b/package/CHANGELOG index 9aae6e3db92..ff9d088c74a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -126,6 +126,9 @@ Fixes * new `Results` class now can be pickled/unpickled. (PR #3309) Enhancements + * LAMMPSDumpReader can now read coordinates in all the different LAMMPS + coordinate conventions. Both LAMMPSDumpReader and LAMMPSDumpParser are no + longer hardcoded to a set column layout (Issue #3358, PR #3360). * Add a ``sort`` keyword to AtomGroup.select_atoms for ordered selections (Issue #3364, PR #3368) * Adds AtomGroup.asunique, ResidueGroup.asunique, SegmentGroup.asunique diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 00d32031a4d..e5d758816f2 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -119,6 +119,9 @@ .. autoclass:: DATAWriter :members: :inherited-members: +.. autoclass:: DumpReader + :members: + :inherited-members: """ import os @@ -449,19 +452,46 @@ def write(self, selection, frame=None): class DumpReader(base.ReaderBase): """Reads the default `LAMMPS dump format`_ - Expects trajectories produced by the default 'atom' style dump. + Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs), + "unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate + conventions (see https://docs.lammps.org/dump.html for more details). + If `lammps_coordinate_convention='auto'` (default), + one will be guessed. Guessing checks whether the coordinates fit each convention in the order "unscaled", + "scaled", "unwrapped", "scaled_unwrapped" and whichever set of coordinates + is detected first will be used. If coordinates are given in the scaled + coordinate convention (xs,ys,zs) or scaled unwrapped coordinate convention + (xsu,ysu,zsu) they will automatically be converted from their + scaled/fractional representation to their real values. - Will automatically convert positions from their scaled/fractional - representation to their real values. + .. versionchanged:: 2.0.0 + Now parses coordinates in multiple lammps conventions (x,xs,xu,xsu) .. versionadded:: 0.19.0 """ format = 'LAMMPSDUMP' - - def __init__(self, filename, **kwargs): + _conventions = ["auto", "unscaled", "scaled", "unwrapped", + "scaled_unwrapped"] + _coordtype_column_names = { + "unscaled": ["x", "y", "z"], + "scaled": ["xs", "ys", "zs"], + "unwrapped": ["xu", "yu", "zu"], + "scaled_unwrapped": ["xsu", "ysu", "zsu"] + } + + def __init__(self, filename, lammps_coordinate_convention="auto", + **kwargs): super(DumpReader, self).__init__(filename, **kwargs) root, ext = os.path.splitext(self.filename) + if lammps_coordinate_convention in self._conventions: + self.lammps_coordinate_convention = lammps_coordinate_convention + else: + option_string = "'" + "', '".join(self._conventions) + "'" + raise ValueError("lammps_coordinate_convention=" + f"'{lammps_coordinate_convention}'" + " is not a valid option. " + f"Please choose one of {option_string}") + self._cache = {} self._reopen() @@ -518,15 +548,15 @@ def _read_next_timestep(self): if ts.frame >= len(self): raise EOFError - f.readline() # ITEM TIMESTEP + f.readline() # ITEM TIMESTEP step_num = int(f.readline()) ts.data['step'] = step_num - f.readline() # ITEM NUMBER OF ATOMS + f.readline() # ITEM NUMBER OF ATOMS n_atoms = int(f.readline()) if n_atoms != self.n_atoms: raise ValueError("Number of atoms in trajectory changed " - "this is not suported in MDAnalysis") + "this is not supported in MDAnalysis") triclinic = len(f.readline().split()) == 9 # ITEM BOX BOUNDS if triclinic: @@ -552,16 +582,42 @@ def _read_next_timestep(self): indices = np.zeros(self.n_atoms, dtype=int) - f.readline() # ITEM ATOMS etc - for i in range(self.n_atoms): - idx, _, xs, ys, zs = f.readline().split() + atomline = f.readline() # ITEM ATOMS etc + attrs = atomline.split()[2:] # attributes on coordinate line + attr_to_col_ix = {x: i for i, x in enumerate(attrs)} + convention_to_col_ix = {} + for cv_name, cv_col_names in self._coordtype_column_names.items(): + try: + convention_to_col_ix[cv_name] = [attr_to_col_ix[x] for x in cv_col_names] + except KeyError: + pass + # this should only trigger on first read of "ATOM" card, after which it + # is fixed to the guessed value. Auto proceeds unscaled -> scaled + # -> unwrapped -> scaled_unwrapped + if self.lammps_coordinate_convention == "auto": + try: + # this will automatically select in order of priority + # unscaled, scaled, unwrapped, scaled_unwrapped + self.lammps_coordinate_convention = list(convention_to_col_ix)[0] + except IndexError: + raise ValueError("No coordinate information detected") + elif not self.lammps_coordinate_convention in convention_to_col_ix: + raise ValueError(f"No coordinates following convention {self.lammps_coordinate_convention} found in timestep") + + coord_cols = convention_to_col_ix[self.lammps_coordinate_convention] - indices[i] = idx - ts.positions[i] = xs, ys, zs + ids = "id" in attr_to_col_ix + for i in range(self.n_atoms): + fields = f.readline().split() + if ids: + indices[i] = fields[attr_to_col_ix["id"]] + ts.positions[i] = [fields[dim] for dim in coord_cols] order = np.argsort(indices) ts.positions = ts.positions[order] - # by default coordinates are given in scaled format, undo that - ts.positions = distances.transform_StoR(ts.positions, ts.dimensions) + if (self.lammps_coordinate_convention.startswith("scaled")): + # if coordinates are given in scaled format, undo that + ts.positions = distances.transform_StoR(ts.positions, + ts.dimensions) return ts diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py index 699bc7e1a8c..e89760f8b87 100644 --- a/package/MDAnalysis/topology/LAMMPSParser.py +++ b/package/MDAnalysis/topology/LAMMPSParser.py @@ -584,10 +584,12 @@ def _parse_box(self, header): class LammpsDumpParser(TopologyReaderBase): - """Parses Lammps ascii dump files in 'atom' format + """Parses Lammps ascii dump files in 'atom' format. - Only reads atom ids. Sets all masses to 1.0. + Sets all masses to 1.0. + + .. versionchanged:: 2.0.0 .. versionadded:: 0.19.0 """ format = 'LAMMPSDUMP' @@ -608,12 +610,15 @@ def parse(self, **kwargs): indices = np.zeros(natoms, dtype=int) types = np.zeros(natoms, dtype=object) - fin.readline() # ITEM ATOMS + atomline = fin.readline() # ITEM ATOMS + attrs = atomline.split()[2:] # attributes on coordinate line + col_ids = {attr: i for i, attr in enumerate(attrs)} # column ids + for i in range(natoms): - idx, atype, _, _, _ = fin.readline().split() + fields = fin.readline().split() - indices[i] = idx - types[i] = atype + indices[i] = fields[col_ids["id"]] + types[i] = fields[col_ids["type"]] order = np.argsort(indices) indices = indices[order] diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py index 53c5ee94c7e..dcb6e89b368 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_lammps.py +++ b/testsuite/MDAnalysisTests/coordinates/test_lammps.py @@ -36,7 +36,8 @@ RefLAMMPSData, RefLAMMPSDataMini, RefLAMMPSDataDCD, ) from MDAnalysisTests.datafiles import ( - LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP + LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP, + LAMMPSDUMP_allcoords, LAMMPSDUMP_nocoords ) @@ -119,7 +120,7 @@ def test_Writer_dimensions(self, LAMMPSDATAWriter): assert_almost_equal(u_ref.dimensions, u_new.dimensions, err_msg="attributes different after writing", decimal=6) - + def test_Writer_atoms_types(self, LAMMPSDATAWriter): u_ref, u_new = LAMMPSDATAWriter assert_equal(u_ref.atoms.types, u_new.atoms.types, @@ -434,7 +435,8 @@ def u(self, tmpdir, request): with gzip.GzipFile(f, 'wb') as fout: fout.write(data) - yield mda.Universe(f, format='LAMMPSDUMP') + yield mda.Universe(f, format='LAMMPSDUMP', + lammps_coordinate_convention="auto") @pytest.fixture() def reference_positions(self): @@ -507,3 +509,119 @@ def test_atom_reordering(self, u, reference_positions): reference_positions['atom13_pos']): assert_almost_equal(atom1.position, atom1_pos, decimal=5) assert_almost_equal(atom13.position, atom13_pos, decimal=5) + + +@pytest.mark.parametrize("convention", + ["unscaled", "unwrapped", "scaled_unwrapped"]) +def test_open_absent_convention_fails(convention): + with pytest.raises(ValueError, match="No coordinates following"): + mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP', + lammps_coordinate_convention=convention) + + +def test_open_incorrect_convention_fails(): + with pytest.raises(ValueError, + match="is not a valid option"): + mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP', + lammps_coordinate_convention="42") + + +@pytest.mark.parametrize("convention,result", + [("auto", "unscaled"), ("unscaled", "unscaled"), + ("scaled", "scaled"), ("unwrapped", "unwrapped"), + ("scaled_unwrapped", "scaled_unwrapped")]) +def test_open_all_convention(convention, result): + u = mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP', + lammps_coordinate_convention=convention) + assert(u.trajectory.lammps_coordinate_convention == result) + + +def test_no_coordinate_info(): + with pytest.raises(ValueError, match="No coordinate information detected"): + u = mda.Universe(LAMMPSDUMP_nocoords, format='LAMMPSDUMP', + lammps_coordinate_convention="auto") + + +class TestCoordinateMatches(object): + @pytest.fixture() + def universes(self): + coordinate_conventions = ["auto", "unscaled", "scaled", "unwrapped", + "scaled_unwrapped"] + universes = {i: mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP', + lammps_coordinate_convention=i) + for i in coordinate_conventions} + return universes + + @pytest.fixture() + def reference_unscaled_positions(self): + # copied from trajectory file + # atom 340 is the first one in the trajectory so we use that + atom340_pos1_unscaled = [4.48355, 0.331422, 1.59231] + atom340_pos2_unscaled = [4.41947, 35.4403, 2.25115] + atom340_pos3_unscaled = [4.48989, 0.360633, 2.63623] + return np.asarray([atom340_pos1_unscaled, atom340_pos2_unscaled, + atom340_pos3_unscaled]) + + def test_unscaled_reference(self, universes, reference_unscaled_positions): + atom_340 = universes["unscaled"].atoms[339] + for i, ts_u in enumerate(universes["unscaled"].trajectory[0:3]): + assert_almost_equal(atom_340.position, + reference_unscaled_positions[i, :], decimal=5) + + def test_scaled_reference(self, universes, reference_unscaled_positions): + # NOTE use of unscaled positions here due to S->R transform + atom_340 = universes["scaled"].atoms[339] + for i, ts_u in enumerate(universes["scaled"].trajectory[0:3]): + assert_almost_equal(atom_340.position, + reference_unscaled_positions[i, :], decimal=1) + # NOTE this seems a bit inaccurate? + + @pytest.fixture() + def reference_unwrapped_positions(self): + # copied from trajectory file + # atom 340 is the first one in the trajectory so we use that + atom340_pos1_unwrapped = [4.48355, 35.8378, 1.59231] + atom340_pos2_unwrapped = [4.41947, 35.4403, 2.25115] + atom340_pos3_unwrapped = [4.48989, 35.867, 2.63623] + return np.asarray([atom340_pos1_unwrapped, atom340_pos2_unwrapped, + atom340_pos3_unwrapped]) + + def test_unwrapped_scaled_reference(self, universes, + reference_unwrapped_positions): + atom_340 = universes["unwrapped"].atoms[339] + for i, ts_u in enumerate(universes["unwrapped"].trajectory[0:3]): + assert_almost_equal(atom_340.position, + reference_unwrapped_positions[i, :], decimal=5) + + def test_unwrapped_scaled_reference(self, universes, + reference_unwrapped_positions): + # NOTE use of unscaled positions here due to S->R transform + atom_340 = universes["scaled_unwrapped"].atoms[339] + for i, ts_u in enumerate( + universes["scaled_unwrapped"].trajectory[0:3]): + assert_almost_equal(atom_340.position, + reference_unwrapped_positions[i, :], decimal=1) + # NOTE this seems a bit inaccurate? + + def test_scaled_unscaled_match(self, universes): + assert(len(universes["unscaled"].trajectory) + == len(universes["scaled"].trajectory)) + for ts_u, ts_s in zip(universes["unscaled"].trajectory, + universes["scaled"].trajectory): + assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1) + # NOTE this seems a bit inaccurate? + + def test_unwrapped_scaled_unwrapped_match(self, universes): + assert(len(universes["unwrapped"].trajectory) == + len(universes["scaled_unwrapped"].trajectory)) + for ts_u, ts_s in zip(universes["unwrapped"].trajectory, + universes["scaled_unwrapped"].trajectory): + assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1) + # NOTE this seems a bit inaccurate? + + def test_auto_is_unscaled_match(self, universes): + assert(len(universes["auto"].trajectory) == + len(universes["unscaled"].trajectory)) + for ts_a, ts_s in zip(universes["auto"].trajectory, + universes["unscaled"].trajectory): + assert_almost_equal(ts_a.positions, ts_s.positions, decimal=5) diff --git a/testsuite/MDAnalysisTests/data/lammps/spce_all_coords.lammpstrj.bz2 b/testsuite/MDAnalysisTests/data/lammps/spce_all_coords.lammpstrj.bz2 new file mode 100644 index 00000000000..a2529eb403a Binary files /dev/null and b/testsuite/MDAnalysisTests/data/lammps/spce_all_coords.lammpstrj.bz2 differ diff --git a/testsuite/MDAnalysisTests/data/lammps/spce_no_coords.lammpstrj.bz2 b/testsuite/MDAnalysisTests/data/lammps/spce_no_coords.lammpstrj.bz2 new file mode 100644 index 00000000000..2457cafa372 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/lammps/spce_no_coords.lammpstrj.bz2 differ diff --git a/testsuite/MDAnalysisTests/data/lammps/wat.lammpstrj_long.bz2 b/testsuite/MDAnalysisTests/data/lammps/wat.lammpstrj_long.bz2 new file mode 100644 index 00000000000..9eac26d0f15 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/lammps/wat.lammpstrj_long.bz2 differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 487228aeb67..f8ec2e9807c 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -137,6 +137,9 @@ "LAMMPShyd", "LAMMPShyd2", "LAMMPSdata_deletedatoms", # with deleted atoms "LAMMPSDUMP", + "LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame + "LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example + "LAMMPSDUMP_nocoords", # lammpsdump file with no coordinates "unordered_res", # pdb file with resids non sequential "GMS_ASYMOPT", # GAMESS C1 optimization "GMS_SYMOPT", # GAMESS D4h optimization @@ -484,6 +487,10 @@ LAMMPShyd2 = resource_filename(__name__, "data/lammps/hydrogen-class1.data2") LAMMPSdata_deletedatoms = resource_filename(__name__, 'data/lammps/deletedatoms.data') LAMMPSDUMP = resource_filename(__name__, "data/lammps/wat.lammpstrj.bz2") +LAMMPSDUMP_long = resource_filename(__name__, "data/lammps/wat.lammpstrj_long.bz2") +LAMMPSDUMP_allcoords = resource_filename(__name__, "data/lammps/spce_all_coords.lammpstrj.bz2") +LAMMPSDUMP_nocoords = resource_filename(__name__, "data/lammps/spce_no_coords.lammpstrj.bz2") + unordered_res = resource_filename(__name__, "data/unordered_res.pdb") diff --git a/testsuite/MDAnalysisTests/topology/test_lammpsdata.py b/testsuite/MDAnalysisTests/topology/test_lammpsdata.py index 083d0ca6d72..b53edd42feb 100644 --- a/testsuite/MDAnalysisTests/topology/test_lammpsdata.py +++ b/testsuite/MDAnalysisTests/topology/test_lammpsdata.py @@ -35,6 +35,7 @@ LAMMPShyd2, LAMMPSdata_deletedatoms, LAMMPSDUMP, + LAMMPSDUMP_long, ) @@ -268,3 +269,9 @@ def test_id_ordering(self): u = mda.Universe(self.ref_filename, format='LAMMPSDUMP') # the 4th in file has id==13, but should have been sorted assert u.atoms[3].id == 4 + +# this tests that topology can still be constructed if non-standard or uneven +# column present. +class TestDumpParserLong(TestDumpParser): + + ref_filename = LAMMPSDUMP_long