MDAnalysis · IAlibay · Aug 21, 2021 · Aug 21, 2019 · Aug 21, 2019 · Sep 22, 2019
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -126,6 +126,9 @@ Fixes
   * new `Results` class now can be pickled/unpickled. (PR #3309)
 
 Enhancements
+  * LAMMPSDumpReader can now read coordinates in all the different LAMMPS
+    coordinate conventions. Both LAMMPSDumpReader and LAMMPSDumpParser are no
+    longer hardcoded to a set column layout (Issue #3358, PR #3360).
   * Add a ``sort`` keyword to AtomGroup.select_atoms for ordered selections
     (Issue #3364, PR #3368)
   * Adds AtomGroup.asunique, ResidueGroup.asunique, SegmentGroup.asunique

diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py
@@ -119,6 +119,9 @@
 .. autoclass:: DATAWriter
    :members:
    :inherited-members:
+.. autoclass:: DumpReader
+   :members:
+   :inherited-members:
 
 """
 import os
@@ -449,19 +452,46 @@ def write(self, selection, frame=None):
 class DumpReader(base.ReaderBase):
     """Reads the default `LAMMPS dump format`_
 
-    Expects trajectories produced by the default 'atom' style dump.
+    Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs),
+    "unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate
+    conventions (see https://docs.lammps.org/dump.html for more details).
+    If `lammps_coordinate_convention='auto'` (default),
+    one will be guessed. Guessing checks whether the coordinates fit each convention in the order "unscaled",
+    "scaled", "unwrapped", "scaled_unwrapped" and whichever set of coordinates
+    is detected first will be used. If coordinates are given in the scaled
+    coordinate convention (xs,ys,zs) or scaled unwrapped coordinate convention
+    (xsu,ysu,zsu) they will automatically be converted from their
+    scaled/fractional representation to their real values.
 
-    Will automatically convert positions from their scaled/fractional
-    representation to their real values.
 
+    .. versionchanged:: 2.0.0
+       Now parses coordinates in multiple lammps conventions (x,xs,xu,xsu)
     .. versionadded:: 0.19.0
     """
     format = 'LAMMPSDUMP'
-
-    def __init__(self, filename, **kwargs):
+    _conventions = ["auto", "unscaled", "scaled", "unwrapped",
+                    "scaled_unwrapped"]
+    _coordtype_column_names = {
+        "unscaled": ["x", "y", "z"],
+        "scaled": ["xs", "ys", "zs"],
+        "unwrapped": ["xu", "yu", "zu"],
+        "scaled_unwrapped": ["xsu", "ysu", "zsu"]
+    }
+
+    def __init__(self, filename, lammps_coordinate_convention="auto",
+                 **kwargs):
         super(DumpReader, self).__init__(filename, **kwargs)
 
         root, ext = os.path.splitext(self.filename)
+        if lammps_coordinate_convention in self._conventions:
+            self.lammps_coordinate_convention = lammps_coordinate_convention
+        else:
+            option_string = "'" + "', '".join(self._conventions) + "'"
+            raise ValueError("lammps_coordinate_convention="
+                             f"'{lammps_coordinate_convention}'"
+                             " is not a valid option. "
+                             f"Please choose one of {option_string}")
+
         self._cache = {}
 
         self._reopen()
@@ -518,15 +548,15 @@ def _read_next_timestep(self):
         if ts.frame >= len(self):
             raise EOFError
 
-        f.readline() # ITEM TIMESTEP
+        f.readline()  # ITEM TIMESTEP
         step_num = int(f.readline())
         ts.data['step'] = step_num
 
-        f.readline() # ITEM NUMBER OF ATOMS
+        f.readline()  # ITEM NUMBER OF ATOMS
         n_atoms = int(f.readline())
         if n_atoms != self.n_atoms:
             raise ValueError("Number of atoms in trajectory changed "
-                             "this is not suported in MDAnalysis")
+                             "this is not supported in MDAnalysis")
 
         triclinic = len(f.readline().split()) == 9  # ITEM BOX BOUNDS
         if triclinic:
@@ -552,16 +582,42 @@ def _read_next_timestep(self):
 
         indices = np.zeros(self.n_atoms, dtype=int)
 
-        f.readline()  # ITEM ATOMS etc
-        for i in range(self.n_atoms):
-            idx, _, xs, ys, zs = f.readline().split()
+        atomline = f.readline()  # ITEM ATOMS etc
+        attrs = atomline.split()[2:]  # attributes on coordinate line
+        attr_to_col_ix = {x: i for i, x in enumerate(attrs)}
+        convention_to_col_ix = {}
+        for cv_name, cv_col_names in self._coordtype_column_names.items():
+            try:
+                convention_to_col_ix[cv_name] = [attr_to_col_ix[x] for x in cv_col_names]
+            except KeyError:
+                pass
+        # this should only trigger on first read of "ATOM" card, after which it
+        # is fixed to the guessed value. Auto proceeds unscaled -> scaled
+        # -> unwrapped -> scaled_unwrapped
+        if self.lammps_coordinate_convention == "auto":
+            try:
+                # this will automatically select in order of priority
+                # unscaled, scaled, unwrapped, scaled_unwrapped
+                self.lammps_coordinate_convention = list(convention_to_col_ix)[0]
+            except IndexError:
+                raise ValueError("No coordinate information detected")
+        elif not self.lammps_coordinate_convention in convention_to_col_ix:
+            raise ValueError(f"No coordinates following convention {self.lammps_coordinate_convention} found in timestep")
+
+        coord_cols = convention_to_col_ix[self.lammps_coordinate_convention]
 
-            indices[i] = idx
-            ts.positions[i] = xs, ys, zs
+        ids = "id" in attr_to_col_ix
+        for i in range(self.n_atoms):
+            fields = f.readline().split()
+            if ids:
+                indices[i] = fields[attr_to_col_ix["id"]]
+            ts.positions[i] = [fields[dim] for dim in coord_cols]
 
         order = np.argsort(indices)
         ts.positions = ts.positions[order]
-        # by default coordinates are given in scaled format, undo that
-        ts.positions = distances.transform_StoR(ts.positions, ts.dimensions)
+        if (self.lammps_coordinate_convention.startswith("scaled")):
+            # if coordinates are given in scaled format, undo that
+            ts.positions = distances.transform_StoR(ts.positions,
+                                                    ts.dimensions)
 
         return ts
diff --git a/package/MDAnalysis/topology/LAMMPSParser.py b/package/MDAnalysis/topology/LAMMPSParser.py
@@ -584,10 +584,12 @@ def _parse_box(self, header):
 
 
 class LammpsDumpParser(TopologyReaderBase):
-    """Parses Lammps ascii dump files in 'atom' format
+    """Parses Lammps ascii dump files in 'atom' format.
 
-    Only reads atom ids.  Sets all masses to 1.0.
+    Sets all masses to 1.0.
 
+
+    .. versionchanged:: 2.0.0
     .. versionadded:: 0.19.0
     """
     format = 'LAMMPSDUMP'
@@ -608,12 +610,15 @@ def parse(self, **kwargs):
             indices = np.zeros(natoms, dtype=int)
             types = np.zeros(natoms, dtype=object)
 
-            fin.readline()  # ITEM ATOMS
+            atomline = fin.readline()  # ITEM ATOMS
+            attrs = atomline.split()[2:]  # attributes on coordinate line
+            col_ids = {attr: i for i, attr in enumerate(attrs)}  # column ids
+
             for i in range(natoms):
-                idx, atype, _, _, _ = fin.readline().split()
+                fields = fin.readline().split()
 
-                indices[i] = idx
-                types[i] = atype
+                indices[i] = fields[col_ids["id"]]
+                types[i] = fields[col_ids["type"]]
 
         order = np.argsort(indices)
         indices = indices[order]

diff --git a/testsuite/MDAnalysisTests/coordinates/test_lammps.py b/testsuite/MDAnalysisTests/coordinates/test_lammps.py
@@ -36,7 +36,8 @@
     RefLAMMPSData, RefLAMMPSDataMini, RefLAMMPSDataDCD,
 )
 from MDAnalysisTests.datafiles import (
-    LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP
+    LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP,
+    LAMMPSDUMP_allcoords, LAMMPSDUMP_nocoords
 )
 
 
@@ -119,7 +120,7 @@ def test_Writer_dimensions(self, LAMMPSDATAWriter):
         assert_almost_equal(u_ref.dimensions, u_new.dimensions,
                             err_msg="attributes different after writing",
                             decimal=6)
-    
+
     def test_Writer_atoms_types(self, LAMMPSDATAWriter):
         u_ref, u_new = LAMMPSDATAWriter
         assert_equal(u_ref.atoms.types, u_new.atoms.types,
@@ -434,7 +435,8 @@ def u(self, tmpdir, request):
                 with gzip.GzipFile(f, 'wb') as fout:
                     fout.write(data)
 
-        yield mda.Universe(f, format='LAMMPSDUMP')
+        yield mda.Universe(f, format='LAMMPSDUMP',
+                           lammps_coordinate_convention="auto")
 
     @pytest.fixture()
     def reference_positions(self):
@@ -507,3 +509,119 @@ def test_atom_reordering(self, u, reference_positions):
                                              reference_positions['atom13_pos']):
             assert_almost_equal(atom1.position, atom1_pos, decimal=5)
             assert_almost_equal(atom13.position, atom13_pos, decimal=5)
+
+
+@pytest.mark.parametrize("convention",
+                         ["unscaled", "unwrapped", "scaled_unwrapped"])
+def test_open_absent_convention_fails(convention):
+    with pytest.raises(ValueError, match="No coordinates following"):
+        mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP',
+                     lammps_coordinate_convention=convention)
+
+
+def test_open_incorrect_convention_fails():
+    with pytest.raises(ValueError,
+                       match="is not a valid option"):
+        mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP',
+                     lammps_coordinate_convention="42")
+
+
+@pytest.mark.parametrize("convention,result",
+                         [("auto", "unscaled"), ("unscaled", "unscaled"),
+                          ("scaled", "scaled"), ("unwrapped", "unwrapped"),
+                          ("scaled_unwrapped", "scaled_unwrapped")])
+def test_open_all_convention(convention, result):
+    u = mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP',
+                     lammps_coordinate_convention=convention)
+    assert(u.trajectory.lammps_coordinate_convention == result)
+
+
+def test_no_coordinate_info():
+    with pytest.raises(ValueError, match="No coordinate information detected"):
+        u = mda.Universe(LAMMPSDUMP_nocoords, format='LAMMPSDUMP',
+                         lammps_coordinate_convention="auto")
+
+
+class TestCoordinateMatches(object):
+    @pytest.fixture()
+    def universes(self):
+        coordinate_conventions = ["auto", "unscaled", "scaled", "unwrapped",
+                                  "scaled_unwrapped"]
+        universes = {i: mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP',
+                     lammps_coordinate_convention=i)
+                     for i in coordinate_conventions}
+        return universes
+
+    @pytest.fixture()
+    def reference_unscaled_positions(self):
+        # copied from trajectory file
+        # atom 340 is the first one in the trajectory so we use that
+        atom340_pos1_unscaled = [4.48355, 0.331422, 1.59231]
+        atom340_pos2_unscaled = [4.41947, 35.4403, 2.25115]
+        atom340_pos3_unscaled = [4.48989, 0.360633, 2.63623]
+        return np.asarray([atom340_pos1_unscaled, atom340_pos2_unscaled,
+                          atom340_pos3_unscaled])
+
+    def test_unscaled_reference(self, universes, reference_unscaled_positions):
+        atom_340 = universes["unscaled"].atoms[339]
+        for i, ts_u in enumerate(universes["unscaled"].trajectory[0:3]):
+            assert_almost_equal(atom_340.position,
+                                reference_unscaled_positions[i, :], decimal=5)
+
+    def test_scaled_reference(self, universes, reference_unscaled_positions):
+        # NOTE use of unscaled positions here due to S->R transform
+        atom_340 = universes["scaled"].atoms[339]
+        for i, ts_u in enumerate(universes["scaled"].trajectory[0:3]):
+            assert_almost_equal(atom_340.position,
+                                reference_unscaled_positions[i, :], decimal=1)
+            # NOTE this seems a bit inaccurate?
+
+    @pytest.fixture()
+    def reference_unwrapped_positions(self):
+        # copied from trajectory file
+        # atom 340 is the first one in the trajectory so we use that
+        atom340_pos1_unwrapped = [4.48355, 35.8378, 1.59231]
+        atom340_pos2_unwrapped = [4.41947, 35.4403, 2.25115]
+        atom340_pos3_unwrapped = [4.48989, 35.867, 2.63623]
+        return np.asarray([atom340_pos1_unwrapped, atom340_pos2_unwrapped,
+                          atom340_pos3_unwrapped])
+
+    def test_unwrapped_scaled_reference(self, universes,
+                                        reference_unwrapped_positions):
+        atom_340 = universes["unwrapped"].atoms[339]
+        for i, ts_u in enumerate(universes["unwrapped"].trajectory[0:3]):
+            assert_almost_equal(atom_340.position,
+                                reference_unwrapped_positions[i, :], decimal=5)
+
+    def test_unwrapped_scaled_reference(self, universes,
+                                        reference_unwrapped_positions):
+        # NOTE use of unscaled positions here due to S->R transform
+        atom_340 = universes["scaled_unwrapped"].atoms[339]
+        for i, ts_u in enumerate(
+                universes["scaled_unwrapped"].trajectory[0:3]):
+            assert_almost_equal(atom_340.position,
+                                reference_unwrapped_positions[i, :], decimal=1)
+            # NOTE this seems a bit inaccurate?
+
+    def test_scaled_unscaled_match(self, universes):
+        assert(len(universes["unscaled"].trajectory)
+               == len(universes["scaled"].trajectory))
+        for ts_u, ts_s in zip(universes["unscaled"].trajectory,
+                              universes["scaled"].trajectory):
+            assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1)
+            # NOTE this seems a bit inaccurate?
+
+    def test_unwrapped_scaled_unwrapped_match(self, universes):
+        assert(len(universes["unwrapped"].trajectory) ==
+               len(universes["scaled_unwrapped"].trajectory))
+        for ts_u, ts_s in zip(universes["unwrapped"].trajectory,
+                              universes["scaled_unwrapped"].trajectory):
+            assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1)
+            # NOTE this seems a bit inaccurate?
+
+    def test_auto_is_unscaled_match(self, universes):
+        assert(len(universes["auto"].trajectory) ==
+               len(universes["unscaled"].trajectory))
+        for ts_a, ts_s in zip(universes["auto"].trajectory,
+                              universes["unscaled"].trajectory):
+            assert_almost_equal(ts_a.positions, ts_s.positions, decimal=5)
diff --git a/testsuite/MDAnalysisTests/data/lammps/spce_all_coords.lammpstrj.bz2 b/testsuite/MDAnalysisTests/data/lammps/spce_all_coords.lammpstrj.bz2
diff --git a/testsuite/MDAnalysisTests/data/lammps/spce_no_coords.lammpstrj.bz2 b/testsuite/MDAnalysisTests/data/lammps/spce_no_coords.lammpstrj.bz2
diff --git a/testsuite/MDAnalysisTests/data/lammps/wat.lammpstrj_long.bz2 b/testsuite/MDAnalysisTests/data/lammps/wat.lammpstrj_long.bz2
diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py
@@ -137,6 +137,9 @@
     "LAMMPShyd", "LAMMPShyd2",
     "LAMMPSdata_deletedatoms",  # with deleted atoms
     "LAMMPSDUMP",
+    "LAMMPSDUMP_long",  # lammpsdump file with a few zeros sprinkled in the first column first frame
+    "LAMMPSDUMP_allcoords",  # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example
+    "LAMMPSDUMP_nocoords",  # lammpsdump file with no coordinates
     "unordered_res",  # pdb file with resids non sequential
     "GMS_ASYMOPT",  # GAMESS C1  optimization
     "GMS_SYMOPT",   # GAMESS D4h optimization
@@ -484,6 +487,10 @@
 LAMMPShyd2 = resource_filename(__name__, "data/lammps/hydrogen-class1.data2")
 LAMMPSdata_deletedatoms = resource_filename(__name__, 'data/lammps/deletedatoms.data')
 LAMMPSDUMP = resource_filename(__name__, "data/lammps/wat.lammpstrj.bz2")
+LAMMPSDUMP_long = resource_filename(__name__, "data/lammps/wat.lammpstrj_long.bz2")
+LAMMPSDUMP_allcoords = resource_filename(__name__, "data/lammps/spce_all_coords.lammpstrj.bz2")
+LAMMPSDUMP_nocoords = resource_filename(__name__, "data/lammps/spce_no_coords.lammpstrj.bz2")
+
 
 unordered_res = resource_filename(__name__, "data/unordered_res.pdb")
 

diff --git a/testsuite/MDAnalysisTests/topology/test_lammpsdata.py b/testsuite/MDAnalysisTests/topology/test_lammpsdata.py
@@ -35,6 +35,7 @@
     LAMMPShyd2,
     LAMMPSdata_deletedatoms,
     LAMMPSDUMP,
+    LAMMPSDUMP_long,
 )
 
 
@@ -268,3 +269,9 @@ def test_id_ordering(self):
         u = mda.Universe(self.ref_filename, format='LAMMPSDUMP')
         # the 4th in file has id==13, but should have been sorted
         assert u.atoms[3].id == 4
+
+# this tests that topology can still be constructed if non-standard or uneven
+# column present.
+class TestDumpParserLong(TestDumpParser):
+
+    ref_filename = LAMMPSDUMP_long