Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
67 commits
Select commit Hold shift + click to select a range
b7526cb
removed hardcoded atom line
Aug 21, 2019
d45a8f1
added PR2333
Aug 21, 2019
aefbaf5
rebased
Sep 22, 2019
d6e904d
added docs and test
lilyminium Feb 14, 2020
26f5645
initial colids
hmacdope Jul 6, 2021
b8d42d4
Merge remote-tracking branch 'lily/unhardcode-lammpsdump-parser' into…
hmacdope Jul 6, 2021
06f109b
recover original behaviour
hmacdope Jul 6, 2021
417ac61
add convention kwarg
hmacdope Jul 6, 2021
23b3a50
add warning
hmacdope Jul 6, 2021
580b8ae
add some docs and line lengths
hmacdope Jul 6, 2021
c59758c
clean up
hmacdope Jul 6, 2021
cfccb1a
remove coord data flag
hmacdope Jul 6, 2021
482dd9b
try and fix pep8
hmacdope Jul 6, 2021
6b3abbe
pep8 again
hmacdope Jul 6, 2021
f91d55b
add flexibility to id cols
hmacdope Jul 6, 2021
c09b1a2
fix richards demands
hmacdope Jul 6, 2021
3704dfe
fix typo
hmacdope Jul 6, 2021
bf85a49
Update package/MDAnalysis/topology/LAMMPSParser.py
hmacdope Jul 6, 2021
b9497b6
Update package/MDAnalysis/coordinates/LAMMPS.py
hmacdope Jul 6, 2021
7e8d429
add versionchanged and change name to auto
hmacdope Jul 7, 2021
d48e499
typo
hmacdope Jul 7, 2021
268ffa6
add datafile with all coordinate conventions
hmacdope Jul 7, 2021
df7db87
update test includes
hmacdope Jul 7, 2021
755daa3
mnodify tester
hmacdope Jul 7, 2021
a0256f4
undo bug from one liner
hmacdope Jul 7, 2021
fd5ef5c
add opening tests
hmacdope Jul 7, 2021
4eaf954
change to one liner in topol parser
hmacdope Jul 7, 2021
fb50a4a
add test for coordinate matches
hmacdope Jul 7, 2021
aa9694b
add regression tests for coordinates
hmacdope Jul 7, 2021
85f39f4
fix tests and add warning for auto detected convention
hmacdope Jul 7, 2021
19752e3
remove warning
hmacdope Jul 7, 2021
f18f0d2
re-add lammpsdump-long
hmacdope Jul 8, 2021
ff58b09
pep8
hmacdope Jul 8, 2021
5d8ea3f
update CHANGELOG
hmacdope Jul 8, 2021
ea05796
remove erroneous print
hmacdope Jul 14, 2021
7ec8a62
add versionchanged
hmacdope Jul 15, 2021
238ed90
Update package/MDAnalysis/coordinates/LAMMPS.py
hmacdope Jul 15, 2021
07eb8fa
Merge branch 'fix-lammpsdump-parser' of github.com:hmacdope/mdanalysi…
hmacdope Jul 15, 2021
0a9e81c
add autoclass
hmacdope Jul 15, 2021
ace0b75
change keys
hmacdope Jul 15, 2021
fb0e307
change coordinate reader
hmacdope Jul 15, 2021
e963e5d
fix up
hmacdope Jul 15, 2021
e059410
pep8
hmacdope Jul 15, 2021
f3ee49b
fix testing
hmacdope Jul 15, 2021
fd10657
fix pep8
hmacdope Jul 15, 2021
9450727
pep8 again
hmacdope Jul 15, 2021
893fe04
fix richard requests
hmacdope Jul 16, 2021
a78527a
orbekst changes
hmacdope Jul 20, 2021
283d0b6
move to enhancement
hmacdope Jul 20, 2021
c418caf
fix docs
hmacdope Jul 20, 2021
bcf3c6e
Update testsuite/MDAnalysisTests/coordinates/test_lammps.py
hmacdope Jul 20, 2021
226dbfc
Update testsuite/MDAnalysisTests/coordinates/test_lammps.py
hmacdope Jul 20, 2021
61886f8
Update testsuite/MDAnalysisTests/coordinates/test_lammps.py
hmacdope Jul 20, 2021
1582d36
Update testsuite/MDAnalysisTests/coordinates/test_lammps.py
hmacdope Jul 20, 2021
1ae6f27
fix indent
hmacdope Jul 20, 2021
efaa80d
pep8
hmacdope Jul 20, 2021
36b1a35
pep8 again
hmacdope Jul 20, 2021
15c073a
pep8 GAIN
hmacdope Jul 20, 2021
6b5cf61
add Lily suggetsions
hmacdope Jul 20, 2021
b95fa58
pep8
hmacdope Jul 20, 2021
159220f
Update package/MDAnalysis/coordinates/LAMMPS.py
hmacdope Aug 21, 2021
11376c1
Update package/MDAnalysis/coordinates/LAMMPS.py
hmacdope Aug 21, 2021
6ba4992
add comment for lammpsdump-long test
hmacdope Aug 21, 2021
0aa046d
lily inspired refactor
hmacdope Aug 21, 2021
c464b68
typo
hmacdope Aug 21, 2021
1d7497f
add comment
hmacdope Aug 21, 2021
11d8518
Merge branch 'develop' into fix-lammpsdump-parser
hmacdope Aug 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ Fixes
* new `Results` class now can be pickled/unpickled. (PR #3309)

Enhancements
* LAMMPSDumpReader can now read coordinates in all the different LAMMPS
coordinate conventions. Both LAMMPSDumpReader and LAMMPSDumpParser are no
longer hardcoded to a set column layout (Issue #3358, PR #3360).
* Add a ``sort`` keyword to AtomGroup.select_atoms for ordered selections
(Issue #3364, PR #3368)
* Adds AtomGroup.asunique, ResidueGroup.asunique, SegmentGroup.asunique
Expand Down
86 changes: 71 additions & 15 deletions package/MDAnalysis/coordinates/LAMMPS.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@
.. autoclass:: DATAWriter
:members:
:inherited-members:
.. autoclass:: DumpReader
:members:
:inherited-members:

"""
import os
Expand Down Expand Up @@ -449,19 +452,46 @@ def write(self, selection, frame=None):
class DumpReader(base.ReaderBase):
"""Reads the default `LAMMPS dump format`_

Expects trajectories produced by the default 'atom' style dump.
Supports coordinates in the LAMMPS "unscaled" (x,y,z), "scaled" (xs,ys,zs),
"unwrapped" (xu,yu,zu) and "scaled_unwrapped" (xsu,ysu,zsu) coordinate
conventions (see https://docs.lammps.org/dump.html for more details).
If `lammps_coordinate_convention='auto'` (default),
one will be guessed. Guessing checks whether the coordinates fit each convention in the order "unscaled",
"scaled", "unwrapped", "scaled_unwrapped" and whichever set of coordinates
is detected first will be used. If coordinates are given in the scaled
coordinate convention (xs,ys,zs) or scaled unwrapped coordinate convention
(xsu,ysu,zsu) they will automatically be converted from their
scaled/fractional representation to their real values.

Will automatically convert positions from their scaled/fractional
representation to their real values.

.. versionchanged:: 2.0.0
Now parses coordinates in multiple lammps conventions (x,xs,xu,xsu)
.. versionadded:: 0.19.0
"""
format = 'LAMMPSDUMP'

def __init__(self, filename, **kwargs):
_conventions = ["auto", "unscaled", "scaled", "unwrapped",
"scaled_unwrapped"]
_coordtype_column_names = {
"unscaled": ["x", "y", "z"],
"scaled": ["xs", "ys", "zs"],
"unwrapped": ["xu", "yu", "zu"],
"scaled_unwrapped": ["xsu", "ysu", "zsu"]
}

def __init__(self, filename, lammps_coordinate_convention="auto",
**kwargs):
super(DumpReader, self).__init__(filename, **kwargs)

root, ext = os.path.splitext(self.filename)
if lammps_coordinate_convention in self._conventions:
self.lammps_coordinate_convention = lammps_coordinate_convention
else:
option_string = "'" + "', '".join(self._conventions) + "'"
raise ValueError("lammps_coordinate_convention="
f"'{lammps_coordinate_convention}'"
" is not a valid option. "
f"Please choose one of {option_string}")

self._cache = {}

self._reopen()
Expand Down Expand Up @@ -518,15 +548,15 @@ def _read_next_timestep(self):
if ts.frame >= len(self):
raise EOFError

f.readline() # ITEM TIMESTEP
f.readline() # ITEM TIMESTEP
step_num = int(f.readline())
ts.data['step'] = step_num

f.readline() # ITEM NUMBER OF ATOMS
f.readline() # ITEM NUMBER OF ATOMS
n_atoms = int(f.readline())
if n_atoms != self.n_atoms:
raise ValueError("Number of atoms in trajectory changed "
"this is not suported in MDAnalysis")
"this is not supported in MDAnalysis")

triclinic = len(f.readline().split()) == 9 # ITEM BOX BOUNDS
if triclinic:
Expand All @@ -552,16 +582,42 @@ def _read_next_timestep(self):

indices = np.zeros(self.n_atoms, dtype=int)

f.readline() # ITEM ATOMS etc
for i in range(self.n_atoms):
idx, _, xs, ys, zs = f.readline().split()
atomline = f.readline() # ITEM ATOMS etc
attrs = atomline.split()[2:] # attributes on coordinate line
attr_to_col_ix = {x: i for i, x in enumerate(attrs)}
convention_to_col_ix = {}
for cv_name, cv_col_names in self._coordtype_column_names.items():
try:
convention_to_col_ix[cv_name] = [attr_to_col_ix[x] for x in cv_col_names]
except KeyError:
pass
# this should only trigger on first read of "ATOM" card, after which it
# is fixed to the guessed value. Auto proceeds unscaled -> scaled
# -> unwrapped -> scaled_unwrapped
if self.lammps_coordinate_convention == "auto":
try:
# this will automatically select in order of priority
# unscaled, scaled, unwrapped, scaled_unwrapped
self.lammps_coordinate_convention = list(convention_to_col_ix)[0]
except IndexError:
raise ValueError("No coordinate information detected")
elif not self.lammps_coordinate_convention in convention_to_col_ix:
raise ValueError(f"No coordinates following convention {self.lammps_coordinate_convention} found in timestep")

coord_cols = convention_to_col_ix[self.lammps_coordinate_convention]

indices[i] = idx
ts.positions[i] = xs, ys, zs
ids = "id" in attr_to_col_ix
for i in range(self.n_atoms):
fields = f.readline().split()
if ids:
indices[i] = fields[attr_to_col_ix["id"]]
ts.positions[i] = [fields[dim] for dim in coord_cols]

order = np.argsort(indices)
ts.positions = ts.positions[order]
# by default coordinates are given in scaled format, undo that
ts.positions = distances.transform_StoR(ts.positions, ts.dimensions)
if (self.lammps_coordinate_convention.startswith("scaled")):
# if coordinates are given in scaled format, undo that
ts.positions = distances.transform_StoR(ts.positions,
ts.dimensions)

return ts
17 changes: 11 additions & 6 deletions package/MDAnalysis/topology/LAMMPSParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,10 +584,12 @@ def _parse_box(self, header):


class LammpsDumpParser(TopologyReaderBase):
"""Parses Lammps ascii dump files in 'atom' format
"""Parses Lammps ascii dump files in 'atom' format.

Only reads atom ids. Sets all masses to 1.0.
Sets all masses to 1.0.


.. versionchanged:: 2.0.0
.. versionadded:: 0.19.0
"""
format = 'LAMMPSDUMP'
Expand All @@ -608,12 +610,15 @@ def parse(self, **kwargs):
indices = np.zeros(natoms, dtype=int)
types = np.zeros(natoms, dtype=object)

fin.readline() # ITEM ATOMS
atomline = fin.readline() # ITEM ATOMS
attrs = atomline.split()[2:] # attributes on coordinate line
col_ids = {attr: i for i, attr in enumerate(attrs)} # column ids

for i in range(natoms):
idx, atype, _, _, _ = fin.readline().split()
fields = fin.readline().split()

indices[i] = idx
types[i] = atype
indices[i] = fields[col_ids["id"]]
types[i] = fields[col_ids["type"]]

order = np.argsort(indices)
indices = indices[order]
Expand Down
124 changes: 121 additions & 3 deletions testsuite/MDAnalysisTests/coordinates/test_lammps.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
RefLAMMPSData, RefLAMMPSDataMini, RefLAMMPSDataDCD,
)
from MDAnalysisTests.datafiles import (
LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP
LAMMPScnt, LAMMPShyd, LAMMPSdata, LAMMPSdata_mini, LAMMPSDUMP,
LAMMPSDUMP_allcoords, LAMMPSDUMP_nocoords
)


Expand Down Expand Up @@ -119,7 +120,7 @@ def test_Writer_dimensions(self, LAMMPSDATAWriter):
assert_almost_equal(u_ref.dimensions, u_new.dimensions,
err_msg="attributes different after writing",
decimal=6)

def test_Writer_atoms_types(self, LAMMPSDATAWriter):
u_ref, u_new = LAMMPSDATAWriter
assert_equal(u_ref.atoms.types, u_new.atoms.types,
Expand Down Expand Up @@ -434,7 +435,8 @@ def u(self, tmpdir, request):
with gzip.GzipFile(f, 'wb') as fout:
fout.write(data)

yield mda.Universe(f, format='LAMMPSDUMP')
yield mda.Universe(f, format='LAMMPSDUMP',
lammps_coordinate_convention="auto")

@pytest.fixture()
def reference_positions(self):
Expand Down Expand Up @@ -507,3 +509,119 @@ def test_atom_reordering(self, u, reference_positions):
reference_positions['atom13_pos']):
assert_almost_equal(atom1.position, atom1_pos, decimal=5)
assert_almost_equal(atom13.position, atom13_pos, decimal=5)


@pytest.mark.parametrize("convention",
["unscaled", "unwrapped", "scaled_unwrapped"])
def test_open_absent_convention_fails(convention):
with pytest.raises(ValueError, match="No coordinates following"):
mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP',
lammps_coordinate_convention=convention)


def test_open_incorrect_convention_fails():
with pytest.raises(ValueError,
match="is not a valid option"):
mda.Universe(LAMMPSDUMP, format='LAMMPSDUMP',
lammps_coordinate_convention="42")


@pytest.mark.parametrize("convention,result",
[("auto", "unscaled"), ("unscaled", "unscaled"),
("scaled", "scaled"), ("unwrapped", "unwrapped"),
("scaled_unwrapped", "scaled_unwrapped")])
def test_open_all_convention(convention, result):
u = mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP',
lammps_coordinate_convention=convention)
assert(u.trajectory.lammps_coordinate_convention == result)


def test_no_coordinate_info():
with pytest.raises(ValueError, match="No coordinate information detected"):
u = mda.Universe(LAMMPSDUMP_nocoords, format='LAMMPSDUMP',
lammps_coordinate_convention="auto")


class TestCoordinateMatches(object):
@pytest.fixture()
def universes(self):
coordinate_conventions = ["auto", "unscaled", "scaled", "unwrapped",
"scaled_unwrapped"]
universes = {i: mda.Universe(LAMMPSDUMP_allcoords, format='LAMMPSDUMP',
lammps_coordinate_convention=i)
for i in coordinate_conventions}
return universes

@pytest.fixture()
def reference_unscaled_positions(self):
# copied from trajectory file
# atom 340 is the first one in the trajectory so we use that
atom340_pos1_unscaled = [4.48355, 0.331422, 1.59231]
atom340_pos2_unscaled = [4.41947, 35.4403, 2.25115]
atom340_pos3_unscaled = [4.48989, 0.360633, 2.63623]
return np.asarray([atom340_pos1_unscaled, atom340_pos2_unscaled,
atom340_pos3_unscaled])

def test_unscaled_reference(self, universes, reference_unscaled_positions):
atom_340 = universes["unscaled"].atoms[339]
for i, ts_u in enumerate(universes["unscaled"].trajectory[0:3]):
assert_almost_equal(atom_340.position,
reference_unscaled_positions[i, :], decimal=5)

def test_scaled_reference(self, universes, reference_unscaled_positions):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@richardjgowers @IAlibay question surrounding the accuracy of the StoR transform from lib.distances. Is this function known to be ill conditioned or something? This only seems to match to 1 dp.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now a separate issue #3361

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1 d.p. sounds problematic, you kinda have to try to get that bad a float match 😕
Was this the same issue with the old StoR transformation or is it now only becoming an issue with this code?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah no changes made to StoR() here, I have raised an issue #3361, as will need to validate on a toy system.

# NOTE use of unscaled positions here due to S->R transform
atom_340 = universes["scaled"].atoms[339]
for i, ts_u in enumerate(universes["scaled"].trajectory[0:3]):
assert_almost_equal(atom_340.position,
reference_unscaled_positions[i, :], decimal=1)
# NOTE this seems a bit inaccurate?

@pytest.fixture()
def reference_unwrapped_positions(self):
# copied from trajectory file
# atom 340 is the first one in the trajectory so we use that
atom340_pos1_unwrapped = [4.48355, 35.8378, 1.59231]
atom340_pos2_unwrapped = [4.41947, 35.4403, 2.25115]
atom340_pos3_unwrapped = [4.48989, 35.867, 2.63623]
return np.asarray([atom340_pos1_unwrapped, atom340_pos2_unwrapped,
atom340_pos3_unwrapped])

def test_unwrapped_scaled_reference(self, universes,
reference_unwrapped_positions):
atom_340 = universes["unwrapped"].atoms[339]
for i, ts_u in enumerate(universes["unwrapped"].trajectory[0:3]):
assert_almost_equal(atom_340.position,
reference_unwrapped_positions[i, :], decimal=5)

def test_unwrapped_scaled_reference(self, universes,
reference_unwrapped_positions):
# NOTE use of unscaled positions here due to S->R transform
atom_340 = universes["scaled_unwrapped"].atoms[339]
for i, ts_u in enumerate(
universes["scaled_unwrapped"].trajectory[0:3]):
assert_almost_equal(atom_340.position,
reference_unwrapped_positions[i, :], decimal=1)
# NOTE this seems a bit inaccurate?

def test_scaled_unscaled_match(self, universes):
assert(len(universes["unscaled"].trajectory)
== len(universes["scaled"].trajectory))
for ts_u, ts_s in zip(universes["unscaled"].trajectory,
universes["scaled"].trajectory):
assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1)
# NOTE this seems a bit inaccurate?

def test_unwrapped_scaled_unwrapped_match(self, universes):
assert(len(universes["unwrapped"].trajectory) ==
len(universes["scaled_unwrapped"].trajectory))
for ts_u, ts_s in zip(universes["unwrapped"].trajectory,
universes["scaled_unwrapped"].trajectory):
assert_almost_equal(ts_u.positions, ts_s.positions, decimal=1)
# NOTE this seems a bit inaccurate?

def test_auto_is_unscaled_match(self, universes):
assert(len(universes["auto"].trajectory) ==
len(universes["unscaled"].trajectory))
for ts_a, ts_s in zip(universes["auto"].trajectory,
universes["unscaled"].trajectory):
assert_almost_equal(ts_a.positions, ts_s.positions, decimal=5)
Binary file not shown.
Binary file not shown.
Binary file not shown.
7 changes: 7 additions & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@
"LAMMPShyd", "LAMMPShyd2",
"LAMMPSdata_deletedatoms", # with deleted atoms
"LAMMPSDUMP",
"LAMMPSDUMP_long", # lammpsdump file with a few zeros sprinkled in the first column first frame
"LAMMPSDUMP_allcoords", # lammpsdump file with all coordinate conventions (x,xs,xu,xsu) present, from LAMMPS rdf example
"LAMMPSDUMP_nocoords", # lammpsdump file with no coordinates
"unordered_res", # pdb file with resids non sequential
"GMS_ASYMOPT", # GAMESS C1 optimization
"GMS_SYMOPT", # GAMESS D4h optimization
Expand Down Expand Up @@ -484,6 +487,10 @@
LAMMPShyd2 = resource_filename(__name__, "data/lammps/hydrogen-class1.data2")
LAMMPSdata_deletedatoms = resource_filename(__name__, 'data/lammps/deletedatoms.data')
LAMMPSDUMP = resource_filename(__name__, "data/lammps/wat.lammpstrj.bz2")
LAMMPSDUMP_long = resource_filename(__name__, "data/lammps/wat.lammpstrj_long.bz2")
LAMMPSDUMP_allcoords = resource_filename(__name__, "data/lammps/spce_all_coords.lammpstrj.bz2")
LAMMPSDUMP_nocoords = resource_filename(__name__, "data/lammps/spce_no_coords.lammpstrj.bz2")


unordered_res = resource_filename(__name__, "data/unordered_res.pdb")

Expand Down
7 changes: 7 additions & 0 deletions testsuite/MDAnalysisTests/topology/test_lammpsdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
LAMMPShyd2,
LAMMPSdata_deletedatoms,
LAMMPSDUMP,
LAMMPSDUMP_long,
)


Expand Down Expand Up @@ -268,3 +269,9 @@ def test_id_ordering(self):
u = mda.Universe(self.ref_filename, format='LAMMPSDUMP')
# the 4th in file has id==13, but should have been sorted
assert u.atoms[3].id == 4

# this tests that topology can still be constructed if non-standard or uneven
# column present.
class TestDumpParserLong(TestDumpParser):

ref_filename = LAMMPSDUMP_long