From 5bcd6c12bdcbdab57f4cc350c5df3cda95fc450d Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 01:21:43 -0700 Subject: [PATCH 01/15] added H5PYPicklable class --- package/MDAnalysis/coordinates/H5MD.py | 125 +++++++++++++++++++++---- 1 file changed, 108 insertions(+), 17 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index bf33eec169b..f6b5cf33252 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -31,7 +31,7 @@ interface of the HDF5 library to improve parallel reads and writes. The HDF5 library and `h5py`_ must be installed; otherwise, H5MD files -cannot be read by MDAnalysis. If `h5py`_ is not installed, a +cannot be read by MDAnalysis. If `h5py`_ is not installed, a :exc:`RuntimeError` is raised. Units @@ -94,10 +94,10 @@ Building parallel h5py and HDF5 on Linux ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Building a working parallel HDF5/h5py/mpi4py environment can be +Building a working parallel HDF5/h5py/mpi4py environment can be challenging and is often specific to your local computing resources, e.g., the supercomputer that you're running on typically already has -its preferred MPI installation. As a starting point we provide +its preferred MPI installation. As a starting point we provide instructions that worked in a specific, fairly generic environment. These instructions successfully built parallel HDF5/h5py @@ -133,8 +133,8 @@ CC=mpicc HDF5_DIR=$HDF5_PATH python setup.py build python setup.py install -If you have questions or want to share how you managed to build -parallel hdf5/h5py/mpi4py please let everyone know on the +If you have questions or want to share how you managed to build +parallel hdf5/h5py/mpi4py please let everyone know on the `MDAnalysis forums`_. .. _`H5MD`: https://nongnu.org/h5md/index.html @@ -228,23 +228,23 @@ class H5MDReader(base.ReaderBase): See `h5md documentation `_ for a detailed overview of the H5MD file format. - - The reader attempts to convert units in the trajectory file to - the standard MDAnalysis units (:mod:`MDAnalysis.units`) if + + The reader attempts to convert units in the trajectory file to + the standard MDAnalysis units (:mod:`MDAnalysis.units`) if `convert_units` is set to ``True``. - + Additional data in the *observables* group of the H5MD file are loaded into the :attr:`Timestep.data` dictionary. - + Only 3D-periodic boxes or no periodicity are supported; for no periodicity, :attr:`Timestep.dimensions` will return ``None``. - + Although H5MD can store varying numbers of particles per time step as produced by, e.g., GCMC simulations, MDAnalysis can currently only process a fixed number of particles per step. If the number of particles changes a :exc:`ValueError` is raised. - The :class:`H5MDReader` reads .h5md files with the following + The :class:`H5MDReader` reads .h5md files with the following HDF5 hierarchy: .. code-block:: text @@ -557,13 +557,14 @@ def open_trajectory(self): if self._comm is not None: # can only pass comm argument to h5py.File if driver='mpio' assert self._driver == 'mpio' - self._file = h5py.File(self.filename, 'r', # pragma: no cover - driver=self._driver, - comm=self._comm) + self._file = H5PYPicklable(self.filename, 'r', # pragma: no cover + driver=self._driver, + comm=self._comm) elif self._driver is not None: - self._file = h5py.File(self.filename, 'r', driver=self._driver) + self._file = H5PYPicklable(self.filename, 'r', + driver=self._driver) else: - self._file = h5py.File(self.filename, 'r') + self._file = H5PYPicklable(self.filename, 'r') # pulls first key out of 'particles' # allows for arbitrary name of group1 in 'particles' self._particle_group = self._file['particles'][ @@ -727,3 +728,93 @@ def has_forces(self): @has_forces.setter def has_forces(self, value: bool): self._has['force'] = value + + def __getstate__(self): + state = self.__dict__.copy() + del state['_particle_group'] + return state + + def __setstate__(self, state): + self.__dict__ = state + self._particle_group = self._file['particles'][ + list(self._file['particles'])[0]] + + +class H5PYPicklable(h5py.File): + """H5PY file object (read-only) that can be pickled. + + This class provides a file-like object (as returned by + :class:`h5py.File`) that, + unlike standard Python file objects, + can be pickled. Only read mode is supported. + + When the file is pickled, filename, mode, driver, and comm of + :class:`h5py.File` in the file are saved. On unpickling, the file + is opened by filename, mode, driver, and comm (driver and comm are + optional arguments). This means that for a successful unpickle, the + original file still has to be accessible with its filename. + + Parameters + ---------- + filename : str or file-like + a filename given a text or byte string. + driver : str (optional) + H5PY file driver used to open H5MD file + comm : :class:`MPI.Comm` (optional) + MPI communicator used to open H5MD file + Must be passed with `'mpio'` file driver + + Example + ------- + :: + + f = H5PYPicklable('filename', 'r') + print(f['particles/trajectory/position/value'][0]) + f.close() + + can also be used as context manager:: + + with H5PYPicklable('filename', 'r'): + print(f['particles/trajectory/position/value'][0]) + + See Also + --------- + :class:`MDAnalysis.lib.picklable_file_io.FileIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.BufferIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.TextIOPicklable` + :class:`MDAnalysis.lib.picklable_file_io.GzipPicklable` + :class:`MDAnalysis.lib.picklable_file_io.BZ2Picklable` + + + .. versionadded:: 2.0.0 + """ + def __getstate__(self): + try: + driver = self._driver + comm = self._comm + except AttributeError: # is this error necessary? + driver = None + comm = None + + return { + 'name': self.filename, + 'mode': self.mode, + 'driver': driver, + 'comm': comm + } + + def __setstate__(self, state): + if state['driver'] == 'mpio': + self.__init__(name=state['name'], + mode=state['mode'], + driver=state['driver'], + comm=state['comm']) + + else: + self.__init__(name=state['name'], + mode=state['mode'], + driver=state['driver']) + + def __getnewargs__(self): + """Override the h5py getnewargs to skip its error message""" + return () From d220dc5d7ad0bcd25d06d89cfd50458ce69ee595 Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 13:30:31 -0700 Subject: [PATCH 02/15] added mock h5py and changed __getstate__ attribute error --- package/MDAnalysis/coordinates/H5MD.py | 20 +++++++++++++++---- .../coordinates/pickle_readers.rst | 13 ++++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index f6b5cf33252..2b32afd9c1d 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -180,6 +180,9 @@ .. automethod:: H5MDReader._reopen +.. autoclass:: H5PYPicklable + :members: + """ import numpy as np @@ -190,6 +193,15 @@ import h5py except ImportError: HAS_H5PY = False + + # Allow building documentation even if h5py is not installed + import imp + + class MockH5pyFile: + pass + h5py = imp.new_module("h5py") + h5py.File = MockH5pyFile + else: HAS_H5PY = True @@ -737,7 +749,8 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__ = state self._particle_group = self._file['particles'][ - list(self._file['particles'])[0]] + list(self._file['particles'])[0]] + self[self.ts.frame] class H5PYPicklable(h5py.File): @@ -792,9 +805,8 @@ def __getstate__(self): try: driver = self._driver comm = self._comm - except AttributeError: # is this error necessary? - driver = None - comm = None + except AttributeError: + comm and not (driver == 'mpio') return { 'name': self.filename, diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/pickle_readers.rst b/package/doc/sphinx/source/documentation_pages/coordinates/pickle_readers.rst index 0866590297f..fbcc881ab0c 100644 --- a/package/doc/sphinx/source/documentation_pages/coordinates/pickle_readers.rst +++ b/package/doc/sphinx/source/documentation_pages/coordinates/pickle_readers.rst @@ -1,4 +1,4 @@ -.. Contains the formatted docstrings for the serialization of universe located +.. Contains the formatted docstrings for the serialization of universe located .. mainly in 'MDAnalysis/libs/pickle_file_io.py' .. _serialization: @@ -12,7 +12,7 @@ and what developers should do to serialize a new reader. To make sure every Trajectory reader can be successfully serialized, we implement picklable I/O classes (see :ref:`implemented-fileio`). -When the file is pickled, filename and other necessary attributes of the open +When the file is pickled, filename and other necessary attributes of the open file handle are saved. On unpickling, the file is opened by filename. This means that for a successful unpickle, the original file still has to be accessible with its filename. To retain the current frame of the trajectory, @@ -25,12 +25,12 @@ How to serialize a new reader File Access ^^^^^^^^^^^ -If the new reader uses :func:`util.anyopen()` +If the new reader uses :func:`util.anyopen()` (e.g. :class:`MDAnalysis.coordinates.PDB.PDBReader`), the reading handler can be pickled without modification. If the new reader uses I/O classes from other package (e.g. :class:`MDAnalysis.coordinates.GSD.GSDReader`), -and cannot be pickled natively, create a new picklable class inherited from +and cannot be pickled natively, create a new picklable class inherited from the file class in that package (e.g. :class:`MDAnalysis.coordinates.GSD.GSDPicklable`), adding :func:`__getstate__`, @@ -40,10 +40,10 @@ to allow file handler serialization. To seek or not to seek ^^^^^^^^^^^^^^^^^^^^^^ -Some I/O classes support :func:`seek` and :func:`tell` functions to allow the file +Some I/O classes support :func:`seek` and :func:`tell` functions to allow the file to be pickled with an offset. It is normally not needed for MDAnalysis with random access. But if error occurs during testing, find a way to make the offset work. -Maybe this I/O class supports frame indexing? Maybe the file handler inside this I/O +Maybe this I/O class supports frame indexing? Maybe the file handler inside this I/O class supports offset? For example, in :class:`MDAnalysis.coordinates.TRZ.TRZReader`, @@ -99,3 +99,4 @@ Currently implemented picklable IO Formats * :class:`MDAnalysis.coordinates.GSD.GSDPicklable` * :class:`MDAnalysis.coordinates.TRJ.NCDFPicklable` * :class:`MDAnalysis.coordinates.chemfiles.ChemfilesPicklable` +* :class:`MDAnalysis.coordinates.H5MD.H5PYPicklable` From ddfa3659482872613611d5a22bdcea2176c3616d Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 13:33:07 -0700 Subject: [PATCH 03/15] reverted attribute error --- package/MDAnalysis/coordinates/H5MD.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 2b32afd9c1d..7321aa36ed1 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -806,7 +806,8 @@ def __getstate__(self): driver = self._driver comm = self._comm except AttributeError: - comm and not (driver == 'mpio') + driver = None + comm = None return { 'name': self.filename, From b29e675765c71ee0e015b3690e6b38a902a86188 Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 13:42:53 -0700 Subject: [PATCH 04/15] removed comm from __getstate__ --- package/MDAnalysis/coordinates/H5MD.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 7321aa36ed1..4114e4f2f0a 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -803,18 +803,13 @@ class H5PYPicklable(h5py.File): """ def __getstate__(self): try: - driver = self._driver - comm = self._comm + driver = self.driver except AttributeError: driver = None - comm = None - - return { - 'name': self.filename, - 'mode': self.mode, - 'driver': driver, - 'comm': comm - } + + return {'name': self.filename, + 'mode': self.mode, + 'driver': driver} def __setstate__(self, state): if state['driver'] == 'mpio': From c7087b8cb1473b98ca943057d1697b1a3f91a46d Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 13:57:42 -0700 Subject: [PATCH 05/15] updated changelog --- package/CHANGELOG | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index d4c412247e0..ce83b67aed8 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -40,7 +40,8 @@ Fixes was thrown when finding D-H pairs via the topology if `hydrogens` was an empty AtomGroup (Issue #2848) * Fixed the DMSParser, allowing the creation of multiple segids sharing - residues with identical resids (Issue #1387, PR #2872) + residues with identical resids (Issue #1387, PR #2872) + * H5MD files are now pickleable with H5PYPicklable (Issue #2890, PR #2894) Enhancements * Refactored analysis.helanal into analysis.helix_analysis From a40b298412859ce447e3732145086b47af0f8b68 Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 17:42:42 -0700 Subject: [PATCH 06/15] testing copyreg MPI things --- package/MDAnalysis/coordinates/H5MD.py | 53 +++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 4114e4f2f0a..a831a0c5b1e 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -186,6 +186,7 @@ """ import numpy as np +import copyreg import MDAnalysis as mda from . import base, core from ..exceptions import NoDataError @@ -195,16 +196,32 @@ HAS_H5PY = False # Allow building documentation even if h5py is not installed - import imp + import types class MockH5pyFile: pass - h5py = imp.new_module("h5py") + h5py = types.ModuleType("h5py") h5py.File = MockH5pyFile else: HAS_H5PY = True +try: + from mpi4py import MPI +except ImportError: + HAS_MPI = False + + # Allow building documentation even if mpi4py is not installed + import types + + class MockMPI: + pass + mpi4py = types.ModuleType("mpi4py") + MPI = MockMPI + +else: + HAS_MPI = True + class Timestep(base.Timestep): """H5MD Timestep @@ -801,17 +818,49 @@ class H5PYPicklable(h5py.File): .. versionadded:: 2.0.0 """ + + if HAS_MPI: + def comm_unpickle(self, name): + return getattr(MPI, name) + + def comm_pickle(self, obj): + if obj == MPI.COMM_NULL: + return self.comm_unpickle('COMM_NULL',) + if obj == MPI.COMM_SELF: + return self.comm_unpickle('COMM_SELF',) + if obj == MPI.COMM_WORLD: + return self.comm_unpickle('COMM_WORLD',) + raise TypeError("cannot pickle MPI.Comm object") + + def __getstate__(self): try: driver = self.driver except AttributeError: driver = None + if driver == 'mpio': + if HAS_MPI: + try: + # find comm object from h5py.File + comm = self.id.get_access_plist().get_fapl_mpio()[0] + except AttributeError: + comm = None + + copyreg.pickle(MPI.Intracomm, self.comm_pickle, self.comm_unpickle) + copyreg.pickle(MPI.Comm, self.comm_pickle, self.comm_unpickle) + + return {'name': self.filename, + 'mode': self.mode, + 'driver': driver, + 'comm': comm} + return {'name': self.filename, 'mode': self.mode, 'driver': driver} def __setstate__(self, state): + if state['driver'] == 'mpio': self.__init__(name=state['name'], mode=state['mode'], From 94e6d632e9acab9579d86c1cf1038d0c7b24e8df Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 17:46:05 -0700 Subject: [PATCH 07/15] moved 2 lines --- package/MDAnalysis/coordinates/H5MD.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index a831a0c5b1e..e8ee8c5aca4 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -832,6 +832,8 @@ def comm_pickle(self, obj): return self.comm_unpickle('COMM_WORLD',) raise TypeError("cannot pickle MPI.Comm object") + copyreg.pickle(MPI.Intracomm, comm_pickle, comm_unpickle) + copyreg.pickle(MPI.Comm, comm_pickle, comm_unpickle) def __getstate__(self): try: @@ -847,9 +849,6 @@ def __getstate__(self): except AttributeError: comm = None - copyreg.pickle(MPI.Intracomm, self.comm_pickle, self.comm_unpickle) - copyreg.pickle(MPI.Comm, self.comm_pickle, self.comm_unpickle) - return {'name': self.filename, 'mode': self.mode, 'driver': driver, From cfcbc181e907c08aab5855e992c14900582a892f Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 18:06:53 -0700 Subject: [PATCH 08/15] removed comm_pickle and comm_unpickle for testing --- package/MDAnalysis/coordinates/H5MD.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index e8ee8c5aca4..b71df6b07b9 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -819,22 +819,6 @@ class H5PYPicklable(h5py.File): .. versionadded:: 2.0.0 """ - if HAS_MPI: - def comm_unpickle(self, name): - return getattr(MPI, name) - - def comm_pickle(self, obj): - if obj == MPI.COMM_NULL: - return self.comm_unpickle('COMM_NULL',) - if obj == MPI.COMM_SELF: - return self.comm_unpickle('COMM_SELF',) - if obj == MPI.COMM_WORLD: - return self.comm_unpickle('COMM_WORLD',) - raise TypeError("cannot pickle MPI.Comm object") - - copyreg.pickle(MPI.Intracomm, comm_pickle, comm_unpickle) - copyreg.pickle(MPI.Comm, comm_pickle, comm_unpickle) - def __getstate__(self): try: driver = self.driver From 24c609065a94579e0281c74ff45b2c82f7d9238e Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 23:02:10 -0700 Subject: [PATCH 09/15] testing MPI pickle-able --- package/MDAnalysis/coordinates/H5MD.py | 44 +++++++++++++++----------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index b71df6b07b9..3de599a921e 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -586,14 +586,14 @@ def open_trajectory(self): if self._comm is not None: # can only pass comm argument to h5py.File if driver='mpio' assert self._driver == 'mpio' - self._file = H5PYPicklable(self.filename, 'r', # pragma: no cover + self._file = H5PYPicklable(name = self.filename, mode='r', # pragma: no cover driver=self._driver, comm=self._comm) elif self._driver is not None: - self._file = H5PYPicklable(self.filename, 'r', + self._file = H5PYPicklable(name=self.filename, mode='r', driver=self._driver) else: - self._file = H5PYPicklable(self.filename, 'r') + self._file = H5PYPicklable(name=self.filename, mode='r') # pulls first key out of 'particles' # allows for arbitrary name of group1 in 'particles' self._particle_group = self._file['particles'][ @@ -820,36 +820,23 @@ class H5PYPicklable(h5py.File): """ def __getstate__(self): - try: - driver = self.driver - except AttributeError: - driver = None - + driver = self.driver if driver == 'mpio': - if HAS_MPI: - try: - # find comm object from h5py.File - comm = self.id.get_access_plist().get_fapl_mpio()[0] - except AttributeError: - comm = None - + comm = self.id.get_access_plist().get_fapl_mpio()[0] return {'name': self.filename, 'mode': self.mode, 'driver': driver, 'comm': comm} - return {'name': self.filename, 'mode': self.mode, 'driver': driver} def __setstate__(self, state): - if state['driver'] == 'mpio': self.__init__(name=state['name'], mode=state['mode'], driver=state['driver'], comm=state['comm']) - else: self.__init__(name=state['name'], mode=state['mode'], @@ -858,3 +845,24 @@ def __setstate__(self, state): def __getnewargs__(self): """Override the h5py getnewargs to skip its error message""" return () + + +def comm_unpickle(name): + return getattr(MPI, name) + + +def comm_pickle(obj): + if obj == MPI.COMM_NULL: + return comm_unpickle, ('COMM_NULL',) + if obj == MPI.COMM_SELF: + return comm_unpickle, ('COMM_SELF',) + if obj == MPI.COMM_WORLD: + return comm_unpickle, ('COMM_WORLD',) + if isinstance(obj, MPI.Comm): + return comm_unpickle, ('COMM_WORLD') + raise TypeError("cannot pickle object") + + +if HAS_MPI: + copyreg.pickle(MPI.Intracomm, comm_pickle, comm_unpickle) + copyreg.pickle(MPI.Comm, comm_pickle, comm_unpickle) From 9f28df660e872924b6303e3979cd7d825e011ce7 Mon Sep 17 00:00:00 2001 From: Edis Date: Mon, 10 Aug 2020 23:07:56 -0700 Subject: [PATCH 10/15] added comma for tuple --- package/MDAnalysis/coordinates/H5MD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 3de599a921e..a38f3d8b284 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -859,7 +859,7 @@ def comm_pickle(obj): if obj == MPI.COMM_WORLD: return comm_unpickle, ('COMM_WORLD',) if isinstance(obj, MPI.Comm): - return comm_unpickle, ('COMM_WORLD') + return comm_unpickle, ('COMM_WORLD',) raise TypeError("cannot pickle object") From d34628d67090075acd68191859dde374c9076ac3 Mon Sep 17 00:00:00 2001 From: Edis Date: Tue, 11 Aug 2020 10:27:31 -0700 Subject: [PATCH 11/15] removed parallel pickling and added tests --- package/MDAnalysis/coordinates/H5MD.py | 71 ++++--------------- .../MDAnalysisTests/utils/test_pickleio.py | 24 ++++++- 2 files changed, 36 insertions(+), 59 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index a38f3d8b284..4b056a64a7d 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -186,7 +186,6 @@ """ import numpy as np -import copyreg import MDAnalysis as mda from . import base, core from ..exceptions import NoDataError @@ -206,22 +205,6 @@ class MockH5pyFile: else: HAS_H5PY = True -try: - from mpi4py import MPI -except ImportError: - HAS_MPI = False - - # Allow building documentation even if mpi4py is not installed - import types - - class MockMPI: - pass - mpi4py = types.ModuleType("mpi4py") - MPI = MockMPI - -else: - HAS_MPI = True - class Timestep(base.Timestep): """H5MD Timestep @@ -780,9 +763,8 @@ class H5PYPicklable(h5py.File): When the file is pickled, filename, mode, driver, and comm of :class:`h5py.File` in the file are saved. On unpickling, the file - is opened by filename, mode, driver, and comm (driver and comm are - optional arguments). This means that for a successful unpickle, the - original file still has to be accessible with its filename. + is opened by filename, mode, driver. This means that for a successful + unpickle, the original file still has to be accessible with its filename. Parameters ---------- @@ -790,9 +772,6 @@ class H5PYPicklable(h5py.File): a filename given a text or byte string. driver : str (optional) H5PY file driver used to open H5MD file - comm : :class:`MPI.Comm` (optional) - MPI communicator used to open H5MD file - Must be passed with `'mpio'` file driver Example ------- @@ -807,6 +786,11 @@ class H5PYPicklable(h5py.File): with H5PYPicklable('filename', 'r'): print(f['particles/trajectory/position/value'][0]) + Note + ---- + Pickling of an `h5py.File` opened with `driver="mpio"` and an MPI + communicator is currently not supported + See Also --------- :class:`MDAnalysis.lib.picklable_file_io.FileIOPicklable` @@ -822,47 +806,18 @@ class H5PYPicklable(h5py.File): def __getstate__(self): driver = self.driver if driver == 'mpio': - comm = self.id.get_access_plist().get_fapl_mpio()[0] - return {'name': self.filename, - 'mode': self.mode, - 'driver': driver, - 'comm': comm} + raise TypeError("Parallel pickling of `h5py.File` with" + " 'mpio' driver is currently not supported.") + return {'name': self.filename, 'mode': self.mode, 'driver': driver} def __setstate__(self, state): - if state['driver'] == 'mpio': - self.__init__(name=state['name'], - mode=state['mode'], - driver=state['driver'], - comm=state['comm']) - else: - self.__init__(name=state['name'], - mode=state['mode'], - driver=state['driver']) + self.__init__(name=state['name'], + mode=state['mode'], + driver=state['driver']) def __getnewargs__(self): """Override the h5py getnewargs to skip its error message""" return () - - -def comm_unpickle(name): - return getattr(MPI, name) - - -def comm_pickle(obj): - if obj == MPI.COMM_NULL: - return comm_unpickle, ('COMM_NULL',) - if obj == MPI.COMM_SELF: - return comm_unpickle, ('COMM_SELF',) - if obj == MPI.COMM_WORLD: - return comm_unpickle, ('COMM_WORLD',) - if isinstance(obj, MPI.Comm): - return comm_unpickle, ('COMM_WORLD',) - raise TypeError("cannot pickle object") - - -if HAS_MPI: - copyreg.pickle(MPI.Intracomm, comm_pickle, comm_unpickle) - copyreg.pickle(MPI.Comm, comm_pickle, comm_unpickle) diff --git a/testsuite/MDAnalysisTests/utils/test_pickleio.py b/testsuite/MDAnalysisTests/utils/test_pickleio.py index b005f478c12..bbc7a5a5f23 100644 --- a/testsuite/MDAnalysisTests/utils/test_pickleio.py +++ b/testsuite/MDAnalysisTests/utils/test_pickleio.py @@ -50,6 +50,10 @@ from MDAnalysis.coordinates.chemfiles import ( ChemfilesPicklable ) +from MDAnalysis.coordinates.H5MD import HAS_H5PY +from MDAnalysis.coordinates.H5MD import ( + H5PYPicklable +) from MDAnalysis.tests.datafiles import ( PDB, @@ -58,7 +62,9 @@ MMTF_gz, GMS_ASYMOPT, GSD, - NCDF + NCDF, + TPR_xvf, + H5MD_xvf ) @@ -200,3 +206,19 @@ def test_Chemfiles_with_write_mode(tmpdir): with pytest.raises(ValueError, match=r"Only read mode"): chemfiles_io = ChemfilesPicklable(tmpdir.mkdir("xyz").join('t.xyz'), mode='w') + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_H5MD_pickle(): + h5md_io = H5PYPicklable(H5MD_xvf, 'r') + h5md_io_pickled = pickle.loads(pickle.dumps(h5md_io)) + assert_equal(h5md_io['particles/trajectory/position/value'][0], + h5md_io_pickled['particles/trajectory/position/value'][0]) + + +@pytest.mark.skipif(not HAS_H5PY, reason="h5py not installed") +def test_H5MD_pickle_with_driver(): + h5md_io = H5PYPicklable(H5MD_xvf, 'r', driver='core') + h5md_io_pickled = pickle.loads(pickle.dumps(h5md_io)) + assert_equal(h5md_io['particles/trajectory/position/value'][0], + h5md_io_pickled['particles/trajectory/position/value'][0]) From b64501f2a98bb78f797ebf0a596674b9d658cb05 Mon Sep 17 00:00:00 2001 From: Edis Date: Tue, 11 Aug 2020 12:10:50 -0700 Subject: [PATCH 12/15] added pragma: no cover to 'mpio' driver line with comments --- package/MDAnalysis/coordinates/H5MD.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 4b056a64a7d..194ee0dfa56 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -805,7 +805,11 @@ class H5PYPicklable(h5py.File): def __getstate__(self): driver = self.driver - if driver == 'mpio': + # Current issues: Need a way to retrieve MPI communicator object + # from self and pickle MPI.Comm object. Parallel driver is excluded + # from test because h5py call for an MPI configuration when driver is + # 'mpio', so this will need to be patched in the test function. + if driver == 'mpio': # pragma: no cover raise TypeError("Parallel pickling of `h5py.File` with" " 'mpio' driver is currently not supported.") From 3eb97ae09b6d05dfc701346942dc39a4f1291a7e Mon Sep 17 00:00:00 2001 From: Edis Date: Tue, 11 Aug 2020 12:12:15 -0700 Subject: [PATCH 13/15] added extra pragma --- package/MDAnalysis/coordinates/H5MD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 194ee0dfa56..3aa38beac24 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -810,7 +810,7 @@ def __getstate__(self): # from test because h5py call for an MPI configuration when driver is # 'mpio', so this will need to be patched in the test function. if driver == 'mpio': # pragma: no cover - raise TypeError("Parallel pickling of `h5py.File` with" + raise TypeError("Parallel pickling of `h5py.File` with" # pragma: no cover " 'mpio' driver is currently not supported.") return {'name': self.filename, From fdcd0dc959331201cc3ef90e41e0440bcaf5c427 Mon Sep 17 00:00:00 2001 From: Edis Date: Tue, 11 Aug 2020 15:47:14 -0700 Subject: [PATCH 14/15] fixed a couple typos --- package/MDAnalysis/coordinates/H5MD.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 3aa38beac24..2b30aae8df0 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -289,21 +289,21 @@ class H5MDReader(base.ReaderBase): \-- (position) \-- [step] , gives frame \-- [time] , gives time - +-- units + +-- unit \-- [value] , gives numpy arrary of positions with shape (n_atoms, 3) +-- unit \-- (velocity) \-- [step] , gives frame \-- [time] , gives time - +-- units + +-- unit \-- [value] , gives numpy arrary of velocities with shape (n_atoms, 3) +-- unit \-- (force) \-- [step] , gives frame \-- [time] , gives time - +-- units + +-- unit \-- [value] , gives numpy arrary of forces with shape (n_atoms, 3) +-- unit @@ -807,7 +807,7 @@ def __getstate__(self): driver = self.driver # Current issues: Need a way to retrieve MPI communicator object # from self and pickle MPI.Comm object. Parallel driver is excluded - # from test because h5py call for an MPI configuration when driver is + # from test because h5py calls for an MPI configuration when driver is # 'mpio', so this will need to be patched in the test function. if driver == 'mpio': # pragma: no cover raise TypeError("Parallel pickling of `h5py.File` with" # pragma: no cover From 1966ee74239f6b723b40fb650623b2e122d395f0 Mon Sep 17 00:00:00 2001 From: Edis Date: Tue, 11 Aug 2020 15:53:38 -0700 Subject: [PATCH 15/15] pep8 changes --- package/MDAnalysis/coordinates/H5MD.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/H5MD.py b/package/MDAnalysis/coordinates/H5MD.py index 2b30aae8df0..a49594282d5 100644 --- a/package/MDAnalysis/coordinates/H5MD.py +++ b/package/MDAnalysis/coordinates/H5MD.py @@ -569,7 +569,8 @@ def open_trajectory(self): if self._comm is not None: # can only pass comm argument to h5py.File if driver='mpio' assert self._driver == 'mpio' - self._file = H5PYPicklable(name = self.filename, mode='r', # pragma: no cover + self._file = H5PYPicklable(name=self.filename, # pragma: no cover + mode='r', driver=self._driver, comm=self._comm) elif self._driver is not None: