From dec7179fb11d72ef4d3d11617efd4cb5b344e451 Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 1 Aug 2021 10:12:26 +0100 Subject: [PATCH 1/3] Ensure that GRO reader treats lack of unit cells consistently --- package/CHANGELOG | 2 + package/MDAnalysis/coordinates/GRO.py | 43 ++++++++++++++----- .../MDAnalysisTests/coordinates/test_gro.py | 39 +++++++++++++++++ 3 files changed, 74 insertions(+), 10 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 979aa607a16..9780c2677ae 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -23,6 +23,8 @@ The rules for this file: * 2.0.0 Fixes + * Ensures that reading and writing of GRO files with missing unit cell + information is consistently treated (Issue #3305) * Fixed sometimes wrong sorting of atoms into fragments when unwrapping (Issue #3352, PR #3353) * Fixed missing array flatten preventing PCA from running when mean positions diff --git a/package/MDAnalysis/coordinates/GRO.py b/package/MDAnalysis/coordinates/GRO.py index 2d354a07832..532fe1b9462 100644 --- a/package/MDAnalysis/coordinates/GRO.py +++ b/package/MDAnalysis/coordinates/GRO.py @@ -155,8 +155,19 @@ class GROReader(base.SingleFrameReaderBase): .. note:: This Reader will only read the first frame present in a file. + + .. note:: + GRO files with zeroed 3 entry unit cells (i.e. ``0.0 0.0 0.0``) + are read as missing unit cell information. In these cases ``dimensions`` + will be set to ``None``. + .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based + .. versionchanged:: 2.0.0 + Reader now raises error if unitcell has neither 3 or 9 entries. + Reader now reads a 3 entry zero unit cell (i.e. ``[0, 0, 0]``) as a + being without dimension information (i.e. will the timestep dimension + to ``None``). """ format = 'GRO' units = {'time': None, 'length': 'nm', 'velocity': 'nm/ps'} @@ -214,15 +225,19 @@ def _read_first_frame(self): if len(unitcell) == 3: # special case: a b c --> (a 0 0) (b 0 0) (c 0 0) # see docstring above for format (!) - self.ts.dimensions = np.r_[unitcell, [90., 90., 90.]] + # Treat empty 3 entry boxes as not having a unit cell + if np.allclose(unitcell, [0., 0., 0.]): + wmsg = ("Empty box [0., 0., 0.] found - treating as missing " + "unit cell. Dimensions set to `None`.") + warnings.warn(wmsg) + self.ts.dimensions = None + else: + self.ts.dimensions = np.r_[unitcell, [90., 90., 90.]] elif len(unitcell) == 9: self.ts.dimensions = _gmx_to_dimensions(unitcell) - else: # or maybe raise an error for wrong format?? - warnings.warn("GRO unitcell has neither 3 nor 9 entries --- might be wrong.") - # fill linearly ... not sure about this - unitcell2 = np.zeros(9) - unitcell2[:len(unitcell)] = unitcell - self.ts.dimensions = _gmx_to_dimensions(unitcell2) + else: # raise an error for wrong format + errmsg = "GRO unitcell has neither 3 nor 9 entries." + raise ValueError(errmsg) if self.convert_units: self.convert_pos_from_native(self.ts._pos) # in-place ! @@ -258,9 +273,11 @@ class GROWriter(base.WriterBase): - resnames (defaults to 'UNK') - resids (defaults to '1') - Note - ---- - The precision is hard coded to three decimal places + Notes + ----- + * The precision is hard coded to three decimal places. + * When dimensions are missing (i.e. set to `None`), a zero width + unit cell box will be written (i.e. [0.0, 0.0, 0.0]). .. versionchanged:: 0.11.0 @@ -274,6 +291,9 @@ class GROWriter(base.WriterBase): .. versionchanged:: 0.18.0 Added `reindex` keyword argument to allow original atom ids to be kept. + .. versionchanged:: 2.0.0 + Raises a warning when writing timestep with missing dimension + information (i.e. set to ``None``). """ format = 'GRO' @@ -455,6 +475,9 @@ def write(self, obj): if (ag.dimensions is None or np.allclose(ag.dimensions[3:], [90., 90., 90.])): if ag.dimensions is None: + wmsg = ("missing dimension - setting unit cell to zeroed " + "box [0., 0., 0.]") + warnings.warn(wmsg) box = np.zeros(3) else: box = self.convert_pos_to_native( diff --git a/testsuite/MDAnalysisTests/coordinates/test_gro.py b/testsuite/MDAnalysisTests/coordinates/test_gro.py index 0b01daca585..e25bf969fc5 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_gro.py +++ b/testsuite/MDAnalysisTests/coordinates/test_gro.py @@ -20,6 +20,8 @@ # MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # +from io import StringIO + import MDAnalysis as mda import numpy as np from MDAnalysis.coordinates.GRO import GROReader, GROWriter @@ -37,6 +39,7 @@ GRO_large, two_water_gro_multiframe, GRO_huge_box, + PDB_closed, ) from numpy.testing import ( assert_almost_equal, @@ -485,8 +488,44 @@ def test_multiframe_gro(): assert len(u.trajectory) == 1 assert_equal(u.dimensions, np.array([100, 100, 100, 90, 90, 90], dtype=np.float32)) + def test_huge_box_gro(): u = mda.Universe(GRO_huge_box) assert_equal(u.dimensions, np.array([4.e+05, 4.e+05, 4.e+05, 90, 90, 90], dtype=np.float32)) + + +gro_no_dims = """\ +Single Atom no dims GRO +1 + 1SOL OW 1 1.000 1.000 1.000 +""" + + +@pytest.mark.parametrize('dims', [1, 2, 4, 5, 6, 7, 8]) +def test_bad_box(dims): + cell = ' '.join([str(float(i)) for i in range(dims)]) + grofile = gro_no_dims + cell + + errmsg = "GRO unitcell has neither 3 nor 9 entries." + with pytest.raises(ValueError, match=errmsg): + u = mda.Universe(StringIO(grofile), format='gro') + + +def test_gro_empty_box_write_read(tmpdir): + # Issue #3305 - ensure that read/write deals with None dimensions the same + u = mda.Universe(PDB_closed) + + # Check lack of dimensions + assert u.dimensions is None + + with tmpdir.as_cwd(): + wmsg = " setting unit cell to zeroed box" + with pytest.warns(UserWarning, match=wmsg): + u.atoms.write('test.gro') + + wmsg = "treating as missing unit cell" + with pytest.warns(UserWarning, match=wmsg): + u2 = mda.Universe('test.gro') + assert u2.dimensions is None From e2380f3010799b180c8376e6f22471079796364c Mon Sep 17 00:00:00 2001 From: IAlibay Date: Sun, 1 Aug 2021 10:17:21 +0100 Subject: [PATCH 2/3] Clarify docs and add changelog entry --- package/CHANGELOG | 2 ++ package/MDAnalysis/coordinates/GRO.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 9780c2677ae..aa1514bbeb2 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -23,6 +23,8 @@ The rules for this file: * 2.0.0 Fixes + * GRO files with unit cells with neither 3 of 9 entries are no longer + supported (Issue #3305) * Ensures that reading and writing of GRO files with missing unit cell information is consistently treated (Issue #3305) * Fixed sometimes wrong sorting of atoms into fragments when unwrapping diff --git a/package/MDAnalysis/coordinates/GRO.py b/package/MDAnalysis/coordinates/GRO.py index 532fe1b9462..439a1f7d2a0 100644 --- a/package/MDAnalysis/coordinates/GRO.py +++ b/package/MDAnalysis/coordinates/GRO.py @@ -273,11 +273,14 @@ class GROWriter(base.WriterBase): - resnames (defaults to 'UNK') - resids (defaults to '1') - Notes - ----- - * The precision is hard coded to three decimal places. - * When dimensions are missing (i.e. set to `None`), a zero width - unit cell box will be written (i.e. [0.0, 0.0, 0.0]). + + .. note:: + The precision is hard coded to three decimal places. + + + .. note:: + When dimensions are missing (i.e. set to `None`), a zero width + unit cell box will be written (i.e. [0.0, 0.0, 0.0]). .. versionchanged:: 0.11.0 From 0d1d2344b061777771f7ac745fe9a5d9195dc04c Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Sun, 1 Aug 2021 17:57:24 +0100 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Lily Wang <31115101+lilyminium@users.noreply.github.com> --- package/CHANGELOG | 2 +- package/MDAnalysis/coordinates/GRO.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index aa1514bbeb2..91613de6c17 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -23,7 +23,7 @@ The rules for this file: * 2.0.0 Fixes - * GRO files with unit cells with neither 3 of 9 entries are no longer + * Only GRO files with unit cells defined with 3 or 9 entries are now supported (Issue #3305) * Ensures that reading and writing of GRO files with missing unit cell information is consistently treated (Issue #3305) diff --git a/package/MDAnalysis/coordinates/GRO.py b/package/MDAnalysis/coordinates/GRO.py index 439a1f7d2a0..9a4a12f95be 100644 --- a/package/MDAnalysis/coordinates/GRO.py +++ b/package/MDAnalysis/coordinates/GRO.py @@ -164,7 +164,7 @@ class GROReader(base.SingleFrameReaderBase): .. versionchanged:: 0.11.0 Frames now 0-based instead of 1-based .. versionchanged:: 2.0.0 - Reader now raises error if unitcell has neither 3 or 9 entries. + Reader now only parses boxes defined with 3 or 9 fields. Reader now reads a 3 entry zero unit cell (i.e. ``[0, 0, 0]``) as a being without dimension information (i.e. will the timestep dimension to ``None``).